From c7941f0abf03bcb08218381b2c3c0b23e93c628f Mon Sep 17 00:00:00 2001
From: "Yu, Chong" <chong.yu@intel.com>
Date: Sat, 27 May 2017 10:44:10 +0800
Subject: [PATCH 01/34] Fix Pooling bwd executed using non-optimized layout for
 diff data.

---
 src/caffe/layers/mkldnn_pooling_layer.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)
diff --git a/src/caffe/layers/mkldnn_pooling_layer.cpp b/src/caffe/layers/mkldnn_pooling_layer.cpp
index 6bce42a1c..5ea0ae968 100644
--- a/src/caffe/layers/mkldnn_pooling_layer.cpp
+++ b/src/caffe/layers/mkldnn_pooling_layer.cpp
@@ -364,6 +364,18 @@ void MKLDNNPoolingLayer<Dtype>::InitPoolingBwd(const vector<Blob<Dtype>*>& top
             = get_mkldnn_prv_descriptor<Dtype, true>(top[0]);
         bwd_cmfmt = static_cast<memory::format>(mem_descr->prv_memory_pd()->desc().data.format);
     }
+    
+    bool bottom_data_is_prv = (const_cast<Dtype*>(bottom[0]->prv_data()) != NULL);
+    if (bottom_data_is_prv) {
+        shared_ptr<MKLDNNMemoryDescriptor<Dtype, false> > mem_descr
+            = get_mkldnn_prv_descriptor<Dtype, false>(bottom[0]);
+        memory::format fwd_prv_bottom_data_mfmt = static_cast<memory::format>(mem_descr->prv_memory_pd()->desc().data.format);
+#ifdef DEBUG
+        LOG(INFO) << "MKLDNNReLULayer<Dtype>::InitPoolingBwd: memory format of prv bottom data is: " << fwd_prv_bottom_data_mfmt;
+        LOG(INFO) << "MKLDNNReLULayer<Dtype>::InitPoolingBwd: Reorder the top and bottom diff to the format of prv bottom data! (Performance consideration)";              
+#endif
+        bwd_cmfmt = fwd_prv_bottom_data_mfmt;
+    }
 
     shared_ptr<memory::desc> init_bwd_bottom_md(new memory::desc({bottom_tz}, mpcsn, bwd_cmfmt));
     shared_ptr<memory::desc> init_bwd_top_md(new memory::desc({top_tz}, mpcsn, bwd_cmfmt));

From 211915e7561d3ea74aae88a7b6fbcfa71758530c Mon Sep 17 00:00:00 2001
From: "Yu, Chong" <chong.yu@intel.com>
Date: Sat, 27 May 2017 15:21:37 +0800
Subject: [PATCH 02/34] Further improve the Relu performance when prv top diff
 format is different from prv bottom data format.

---
 src/caffe/layers/mkldnn_relu_layer.cpp | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/src/caffe/layers/mkldnn_relu_layer.cpp b/src/caffe/layers/mkldnn_relu_layer.cpp
index 7b6cb2e06..273e834d8 100644
--- a/src/caffe/layers/mkldnn_relu_layer.cpp
+++ b/src/caffe/layers/mkldnn_relu_layer.cpp
@@ -188,13 +188,31 @@ void MKLDNNReLULayer<Dtype>::InitReLUBwd(const vector<Blob<Dtype>*>& top
     if (top_diff_is_prv) {
       shared_ptr<MKLDNNMemoryDescriptor<Dtype, /* is_diff */ true> > mem_descr
         = get_mkldnn_prv_descriptor<Dtype, /* is_diff */ true>(top[0]);
-#ifdef DEBUG
       memory::format bwd_prv_top_diff_mfmt = static_cast<memory::format>(mem_descr->prv_memory_pd()->desc().data.format);
+#ifdef DEBUG
       LOG(INFO) << "MKLDNNReLULayer<Dtype>::InitReLUBwd: memory format of prv top diff is: " << bwd_prv_top_diff_mfmt;
 #endif
       top_diff_md.reset(new memory::desc(mem_descr->prv_memory_pd()->desc()));
       usr_diff_mpd = mem_descr->usr_memory_pd();
       prv_diff_mpd = mem_descr->prv_memory_pd();
+
+      bool bottom_data_is_prv = (const_cast<Dtype*>(bottom[0]->prv_data()) != NULL);
+      if (bottom_data_is_prv) {
+          shared_ptr<MKLDNNMemoryDescriptor<Dtype, false> > mem_descr
+              = get_mkldnn_prv_descriptor<Dtype, false>(bottom[0]);
+          memory::format fwd_prv_bottom_data_mfmt = static_cast<memory::format>(mem_descr->prv_memory_pd()->desc().data.format);
+#ifdef DEBUG
+          LOG(INFO) << "MKLDNNReLULayer<Dtype>::InitReLUBwd: memory format of prv bottom data is: " << fwd_prv_bottom_data_mfmt;
+#endif
+          if (bwd_prv_top_diff_mfmt != fwd_prv_bottom_data_mfmt)
+          {
+#ifdef DEBUG
+              LOG(INFO) << "MKLDNNReLULayer<Dtype>::InitReLUBwd: Reorder the prv top/bottom diff to the format of prv bottom data! (Performance consideration)";
+#endif
+              prv_diff_mpd = mem_descr->prv_memory_pd();
+          }
+          //top[0]->set_prv_diff_descriptor(NULL);
+      }
     } else {
       bool bottom_data_is_prv = (const_cast<Dtype*>(bottom[0]->prv_data()) != NULL);
       if (bottom_data_is_prv) {
@@ -203,7 +221,7 @@ void MKLDNNReLULayer<Dtype>::InitReLUBwd(const vector<Blob<Dtype>*>& top
 #ifdef DEBUG
           memory::format fwd_prv_bottom_data_mfmt = static_cast<memory::format>(mem_descr->prv_memory_pd()->desc().data.format);
           LOG(INFO) << "MKLDNNReLULayer<Dtype>::InitReLUBwd: memory format of prv bottom data is: " << fwd_prv_bottom_data_mfmt;
-          LOG(INFO) << "MKLDNNReLULayer<Dtype>::InitReLUBwd: Reorder the usr top diff to the format of prv bottom data! (Performance consideration)";              
+          LOG(INFO) << "MKLDNNReLULayer<Dtype>::InitReLUBwd: Reorder the usr top/bottom diff to the format of prv bottom data! (Performance consideration)";
 #endif
           prv_diff_mpd = mem_descr->prv_memory_pd();
           //top[0]->prv_data() is empty, however top[0]->get_prv_diff_descriptor() has value.

From 845985ed77d06064b8b882e7051e3871d7e2e69c Mon Sep 17 00:00:00 2001
From: "Yu, Chong" <chong.yu@intel.com>
Date: Thu, 1 Jun 2017 11:28:17 +0800
Subject: [PATCH 03/34] Optimize the LRN layer bwd using optimized layout for
 bottom diff data.

---
 src/caffe/layers/mkldnn_lrn_layer.cpp | 38 ++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/src/caffe/layers/mkldnn_lrn_layer.cpp b/src/caffe/layers/mkldnn_lrn_layer.cpp
index 48ea4e884..d56854a41 100644
--- a/src/caffe/layers/mkldnn_lrn_layer.cpp
+++ b/src/caffe/layers/mkldnn_lrn_layer.cpp
@@ -256,11 +256,47 @@ void MKLDNNLRNLayer<Dtype>::InitLRNBwd(const vector<Blob<Dtype>*>& top
     if (top_diff_is_prv) {
         shared_ptr<MKLDNNMemoryDescriptor<Dtype, true> > mem_descr
             = get_mkldnn_prv_descriptor<Dtype, true>(top[0]);
+        memory::format bwd_prv_top_diff_mfmt = static_cast<memory::format>(mem_descr->prv_memory_pd()->desc().data.format);
+#ifdef DEBUG
+        LOG(INFO) << "MKLDNNReLULayer<Dtype>::InitLRNBwd: memory format of prv top diff is: " << bwd_prv_top_diff_mfmt;
+#endif        
         top_diff_md.reset(new memory::desc(mem_descr->prv_memory_pd()->desc()));
         usr_diff_mpd = mem_descr->usr_memory_pd();
         prv_diff_mpd = mem_descr->prv_memory_pd();
+
+        bool bottom_data_is_prv = (const_cast<Dtype*>(bottom[0]->prv_data()) != NULL);
+        if (bottom_data_is_prv) {
+            shared_ptr<MKLDNNMemoryDescriptor<Dtype, false> > mem_descr
+                = get_mkldnn_prv_descriptor<Dtype, false>(bottom[0]);
+            memory::format fwd_prv_bottom_data_mfmt = static_cast<memory::format>(mem_descr->prv_memory_pd()->desc().data.format);
+#ifdef DEBUG
+            LOG(INFO) << "MKLDNNReLULayer<Dtype>::InitLRNBwd: memory format of prv bottom data is: " << fwd_prv_bottom_data_mfmt;
+#endif
+            if (bwd_prv_top_diff_mfmt != fwd_prv_bottom_data_mfmt)
+            {
+#ifdef DEBUG
+                LOG(INFO) << "MKLDNNReLULayer<Dtype>::InitLRNBwd: Reorder the prv top/bottom diff to the format of prv bottom data! (Performance consideration)";
+#endif
+                top_diff_md.reset(new memory::desc({tz}, mpcsn, fwd_prv_bottom_data_mfmt));
+            }
+            //top[0]->set_prv_diff_descriptor(NULL);
+        }
     } else {
-        top_diff_md.reset(new memory::desc({tz}, mpcsn, memory::format::nchw));
+        memory::format bwd_cmfmt = memory::format::nchw;
+        bool bottom_data_is_prv = (const_cast<Dtype*>(bottom[0]->prv_data()) != NULL);
+        if (bottom_data_is_prv) {
+            shared_ptr<MKLDNNMemoryDescriptor<Dtype, false> > mem_descr
+                = get_mkldnn_prv_descriptor<Dtype, false>(bottom[0]);
+            memory::format fwd_prv_bottom_data_mfmt = static_cast<memory::format>(mem_descr->prv_memory_pd()->desc().data.format);
+#ifdef DEBUG
+            LOG(INFO) << "MKLDNNReLULayer<Dtype>::InitLRNBwd: memory format of prv bottom data is: " << fwd_prv_bottom_data_mfmt;
+            LOG(INFO) << "MKLDNNReLULayer<Dtype>::InitLRNBwd: Reorder the usr top/bottom diff to the format of prv bottom data! (Performance consideration)";
+#endif
+            bwd_cmfmt = fwd_prv_bottom_data_mfmt;
+            //top[0]->set_prv_diff_descriptor(NULL);
+        }
+
+        top_diff_md.reset(new memory::desc({tz}, mpcsn, bwd_cmfmt));
         usr_diff_mpd.reset(new memory::primitive_desc(*top_diff_md, cpu_engine));
     }
     bottom_diff_md = top_diff_md;

From e2e7332bb6978a3bbd8bf2a0397ca5e2b9546670 Mon Sep 17 00:00:00 2001
From: "Yu, Chong" <chong.yu@intel.com>
Date: Sat, 3 Jun 2017 18:31:05 +0800
Subject: [PATCH 04/34] Fix Googlenet v3 Caffe time test aborted with MKLDNN
 engine.

---
 .../googlenet_v3/mkl/lmdb_solver.prototxt     |   16 +
 .../googlenet_v3/mkl/lmdb_train_val.prototxt  | 3877 +++++++++++++++++
 .../googlenet_v3/mkl/nodata_solver.prototxt   |   16 +
 .../mkl/nodata_train_val.prototxt             | 3860 ++++++++++++++++
 .../googlenet_v3/mkldnn/lmdb_solver.prototxt  |   16 +
 .../mkldnn/lmdb_train_val.prototxt            | 3594 +++++++++++++++
 .../mkldnn/nodata_solver.prototxt             |   16 +
 .../mkldnn/nodata_train_val.prototxt          | 3577 +++++++++++++++
 8 files changed, 14972 insertions(+)
 create mode 100644 models/intel_optimized_models/googlenet_v3/mkl/lmdb_solver.prototxt
 create mode 100644 models/intel_optimized_models/googlenet_v3/mkl/lmdb_train_val.prototxt
 create mode 100644 models/intel_optimized_models/googlenet_v3/mkl/nodata_solver.prototxt
 create mode 100644 models/intel_optimized_models/googlenet_v3/mkl/nodata_train_val.prototxt
 create mode 100644 models/intel_optimized_models/googlenet_v3/mkldnn/lmdb_solver.prototxt
 create mode 100644 models/intel_optimized_models/googlenet_v3/mkldnn/lmdb_train_val.prototxt
 create mode 100644 models/intel_optimized_models/googlenet_v3/mkldnn/nodata_solver.prototxt
 create mode 100644 models/intel_optimized_models/googlenet_v3/mkldnn/nodata_train_val.prototxt

diff --git a/models/intel_optimized_models/googlenet_v3/mkl/lmdb_solver.prototxt b/models/intel_optimized_models/googlenet_v3/mkl/lmdb_solver.prototxt
new file mode 100644
index 000000000..2f9685402
--- /dev/null
+++ b/models/intel_optimized_models/googlenet_v3/mkl/lmdb_solver.prototxt
@@ -0,0 +1,16 @@
+net: "models/intel_optimized_models/googlenet_v3/mkl/lmdb_train_val.prototxt"
+test_iter: 1000
+test_interval: 10000
+test_initialization: false
+display: 40
+average_loss: 40
+base_lr: 0.045
+lr_policy: "step"
+stepsize: 6400
+gamma: 0.96
+max_iter: 1200000
+momentum: 0.9
+weight_decay: 0.0002
+snapshot: 50000
+snapshot_prefix: "models/intel_optimized_models/googlenet_v3/mkl/googlenet_v3_mkl_lmdb"
+solver_mode: CPU
diff --git a/models/intel_optimized_models/googlenet_v3/mkl/lmdb_train_val.prototxt b/models/intel_optimized_models/googlenet_v3/mkl/lmdb_train_val.prototxt
new file mode 100644
index 000000000..d6d99f721
--- /dev/null
+++ b/models/intel_optimized_models/googlenet_v3/mkl/lmdb_train_val.prototxt
@@ -0,0 +1,3877 @@
+name: "InceptionV3"
+
+layer {
+  top: "data"
+  top: "label"
+  name: "data"
+  type: "Data"
+  data_param {
+    source: "/data/LMDB_300px/ilsvrc12_train_lmdb"
+    batch_size: 22
+    backend: LMDB
+#    shuffle: true
+  }
+  include {
+    phase: TRAIN
+  }
+  transform_param {
+    mirror: true
+    crop_size: 299
+#    mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
+    mean_value: 104
+    mean_value: 117
+    mean_value: 123
+  }
+}
+### Validation Set
+layer {
+  top: "data"
+  top: "label"
+  name: "data"
+  type: "Data"
+  data_param {
+    source: "/data/LMDB_300px/ilsvrc12_val_lmdb"
+    batch_size: 50
+    backend: LMDB
+  }
+  include {
+    phase: TEST
+  }
+  transform_param {
+    mirror: false
+    crop_size: 299
+#    mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
+    mean_value: 104
+    mean_value: 117
+    mean_value: 123
+  }
+}
+#--------------------
+
+
+layer {
+  name: "conv_conv2d"
+  type: "Convolution"
+  bottom: "data"
+  top: "conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 32
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "conv_conv2d"
+  top: "conv_conv2d_bn"
+
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "conv_relu"
+  type: "ReLU"
+  bottom: "conv_conv2d_bn"
+  top: "conv_conv2d_relu"
+}
+layer {
+  name: "conv_1_1_conv2d"
+  type: "Convolution"
+  bottom: "conv_conv2d_relu"
+  top: "conv_1_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 32
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "conv_1_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "conv_1_1_conv2d"
+  top: "conv_1_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "conv_1_1_relu"
+  type: "ReLU"
+  bottom: "conv_1_1_conv2d_bn"
+  top: "conv_1_1_conv2d_relu"
+}
+layer {
+  name: "conv_2_2_conv2d"
+  type: "Convolution"
+  bottom: "conv_1_1_conv2d_relu"
+  top: "conv_2_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "conv_2_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "conv_2_2_conv2d"
+  top: "conv_2_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "conv_2_2_relu"
+  type: "ReLU"
+  bottom: "conv_2_2_conv2d_bn"
+  top: "conv_2_2_conv2d_relu"
+}
+layer {
+  name: "pool"
+  type: "Pooling"
+  bottom: "conv_2_2_conv2d_relu"
+  top: "pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+    pad: 0
+  }
+}
+layer {
+  name: "conv_3_3_conv2d"
+  type: "Convolution"
+  bottom: "pool"
+  top: "conv_3_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 80
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "conv_3_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "conv_3_3_conv2d"
+  top: "conv_3_3_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "conv_3_3_relu"
+  type: "ReLU"
+  bottom: "conv_3_3_conv2d_bn"
+  top: "conv_3_3_conv2d_relu"
+}
+layer {
+  name: "conv_4_4_conv2d"
+  type: "Convolution"
+  bottom: "conv_3_3_conv2d_relu"
+  top: "conv_4_4_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "conv_4_4_batchnorm"
+  type: "BatchNorm"
+  bottom: "conv_4_4_conv2d"
+  top: "conv_4_4_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "conv_4_4_relu"
+  type: "ReLU"
+  bottom: "conv_4_4_conv2d_bn"
+  top: "conv_4_4_conv2d_relu"
+}
+layer {
+  name: "pool1"
+  type: "Pooling"
+  bottom: "conv_4_4_conv2d_relu"
+  top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+    pad: 0
+  }
+}
+layer {
+  name: "mixed_conv_conv2d"
+  type: "Convolution"
+  bottom: "pool1"
+  top: "mixed_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_conv_conv2d"
+  top: "mixed_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_conv_conv2d_bn"
+  top: "mixed_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "pool1"
+  top: "mixed_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 48
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_conv_conv2d"
+  top: "mixed_tower_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_conv_conv2d_bn"
+  top: "mixed_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_tower_conv_conv2d_relu"
+  top: "mixed_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_conv_1_conv2d"
+  top: "mixed_tower_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_conv_1_conv2d_bn"
+  top: "mixed_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "pool1"
+  top: "mixed_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_1_conv_conv2d"
+  top: "mixed_tower_1_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_1_conv_conv2d_bn"
+  top: "mixed_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_tower_1_conv_conv2d_relu"
+  top: "mixed_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_1_conv_1_conv2d"
+  top: "mixed_tower_1_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_1_conv_1_conv2d_bn"
+  top: "mixed_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_tower_1_conv_1_conv2d_relu"
+  top: "mixed_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_1_conv_2_conv2d"
+  top: "mixed_tower_1_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_1_conv_2_conv2d_bn"
+  top: "mixed_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_pool"
+  type: "Pooling"
+  bottom: "pool1"
+  top: "AVE_pool_mixed_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_pool"
+  top: "mixed_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 32
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_2_conv_conv2d"
+  top: "mixed_tower_2_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_2_conv_conv2d_bn"
+  top: "mixed_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_chconcat"
+  type: "Concat"
+  bottom: "mixed_conv_conv2d_relu"
+  bottom: "mixed_tower_conv_1_conv2d_relu"
+  bottom: "mixed_tower_1_conv_2_conv2d_relu"
+  bottom: "mixed_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_chconcat"
+  top: "mixed_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_conv_conv2d"
+  top: "mixed_1_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_1_conv_conv2d_bn"
+  top: "mixed_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_1_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_chconcat"
+  top: "mixed_1_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 48
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_conv_conv2d"
+  top: "mixed_1_tower_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_1_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_conv_conv2d_bn"
+  top: "mixed_1_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_1_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_1_tower_conv_conv2d_relu"
+  top: "mixed_1_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_conv_1_conv2d"
+  top: "mixed_1_tower_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_1_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_conv_1_conv2d_bn"
+  top: "mixed_1_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_1_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_chconcat"
+  top: "mixed_1_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_1_conv_conv2d"
+  top: "mixed_1_tower_1_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_1_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_1_conv_conv2d_bn"
+  top: "mixed_1_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_1_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_1_tower_1_conv_conv2d_relu"
+  top: "mixed_1_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_1_conv_1_conv2d"
+  top: "mixed_1_tower_1_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_1_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_1_conv_1_conv2d_bn"
+  top: "mixed_1_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_1_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_1_tower_1_conv_1_conv2d_relu"
+  top: "mixed_1_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_1_conv_2_conv2d"
+  top: "mixed_1_tower_1_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_1_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_1_conv_2_conv2d_bn"
+  top: "mixed_1_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_1_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_chconcat"
+  top: "AVE_pool_mixed_1_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_1_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_1_pool"
+  top: "mixed_1_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_2_conv_conv2d"
+  top: "mixed_1_tower_2_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_1_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_2_conv_conv2d_bn"
+  top: "mixed_1_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_1_chconcat"
+  type: "Concat"
+  bottom: "mixed_1_conv_conv2d_relu"
+  bottom: "mixed_1_tower_conv_1_conv2d_relu"
+  bottom: "mixed_1_tower_1_conv_2_conv2d_relu"
+  bottom: "mixed_1_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_1_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_1_chconcat"
+  top: "mixed_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_conv_conv2d"
+  top: "mixed_2_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_2_conv_conv2d_bn"
+  top: "mixed_2_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_2_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_1_chconcat"
+  top: "mixed_2_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 48
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_conv_conv2d"
+  top: "mixed_2_tower_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_2_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_conv_conv2d_bn"
+  top: "mixed_2_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_2_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_2_tower_conv_conv2d_relu"
+  top: "mixed_2_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_conv_1_conv2d"
+  top: "mixed_2_tower_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_2_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_conv_1_conv2d_bn"
+  top: "mixed_2_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_2_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_1_chconcat"
+  top: "mixed_2_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_1_conv_conv2d"
+  top: "mixed_2_tower_1_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_2_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_1_conv_conv2d_bn"
+  top: "mixed_2_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_2_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_2_tower_1_conv_conv2d_relu"
+  top: "mixed_2_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_1_conv_1_conv2d"
+  top: "mixed_2_tower_1_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_2_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_1_conv_1_conv2d_bn"
+  top: "mixed_2_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_2_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_2_tower_1_conv_1_conv2d_relu"
+  top: "mixed_2_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_1_conv_2_conv2d"
+  top: "mixed_2_tower_1_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_2_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_1_conv_2_conv2d_bn"
+  top: "mixed_2_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_2_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_1_chconcat"
+  top: "AVE_pool_mixed_2_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_2_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_2_pool"
+  top: "mixed_2_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_2_conv_conv2d"
+  top: "mixed_2_tower_2_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_2_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_2_conv_conv2d_bn"
+  top: "mixed_2_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_2_chconcat"
+  type: "Concat"
+  bottom: "mixed_2_conv_conv2d_relu"
+  bottom: "mixed_2_tower_conv_1_conv2d_relu"
+  bottom: "mixed_2_tower_1_conv_2_conv2d_relu"
+  bottom: "mixed_2_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_2_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_3_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_2_chconcat"
+  top: "mixed_3_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_3_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_3_conv_conv2d"
+  top: "mixed_3_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_3_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_3_conv_conv2d_bn"
+  top: "mixed_3_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_3_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_2_chconcat"
+  top: "mixed_3_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_3_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_3_tower_conv_conv2d"
+  top: "mixed_3_tower_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_3_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_3_tower_conv_conv2d_bn"
+  top: "mixed_3_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_3_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_3_tower_conv_conv2d_relu"
+  top: "mixed_3_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_3_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_3_tower_conv_1_conv2d"
+  top: "mixed_3_tower_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_3_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_3_tower_conv_1_conv2d_bn"
+  top: "mixed_3_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_3_tower_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_3_tower_conv_1_conv2d_relu"
+  top: "mixed_3_tower_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_3_tower_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_3_tower_conv_2_conv2d"
+  top: "mixed_3_tower_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_3_tower_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_3_tower_conv_2_conv2d_bn"
+  top: "mixed_3_tower_conv_2_conv2d_relu"
+}
+layer {
+  name: "max_pool_mixed_3_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_2_chconcat"
+  top: "max_pool_mixed_3_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+    pad: 0
+  }
+}
+layer {
+  name: "ch_concat_mixed_3_chconcat"
+  type: "Concat"
+  bottom: "max_pool_mixed_3_pool"
+  bottom: "mixed_3_conv_conv2d_relu"
+  bottom: "mixed_3_tower_conv_2_conv2d_relu"
+  top: "ch_concat_mixed_3_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_4_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_3_chconcat"
+  top: "mixed_4_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_4_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_conv_conv2d"
+  top: "mixed_4_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_4_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_4_conv_conv2d_bn"
+  top: "mixed_4_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_3_chconcat"
+  top: "mixed_4_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_4_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_conv_conv2d"
+  top: "mixed_4_tower_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_4_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_conv_conv2d_bn"
+  top: "mixed_4_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_conv_conv2d_relu"
+  top: "mixed_4_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_4_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_conv_1_conv2d"
+  top: "mixed_4_tower_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_4_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_conv_1_conv2d_bn"
+  top: "mixed_4_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_conv_1_conv2d_relu"
+  top: "mixed_4_tower_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_4_tower_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_conv_2_conv2d"
+  top: "mixed_4_tower_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_4_tower_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_conv_2_conv2d_bn"
+  top: "mixed_4_tower_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_3_chconcat"
+  top: "mixed_4_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_1_conv_conv2d"
+  top: "mixed_4_tower_1_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_1_conv_conv2d_bn"
+  top: "mixed_4_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_1_conv_conv2d_relu"
+  top: "mixed_4_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_1_conv_1_conv2d"
+  top: "mixed_4_tower_1_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_1_conv_1_conv2d_bn"
+  top: "mixed_4_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_1_conv_1_conv2d_relu"
+  top: "mixed_4_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_1_conv_2_conv2d"
+  top: "mixed_4_tower_1_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_1_conv_2_conv2d_bn"
+  top: "mixed_4_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_1_conv_3_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_1_conv_2_conv2d_relu"
+  top: "mixed_4_tower_1_conv_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_1_conv_3_conv2d"
+  top: "mixed_4_tower_1_conv_3_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_3_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_1_conv_3_conv2d_bn"
+  top: "mixed_4_tower_1_conv_3_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_1_conv_4_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_1_conv_3_conv2d_relu"
+  top: "mixed_4_tower_1_conv_4_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_4_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_1_conv_4_conv2d"
+  top: "mixed_4_tower_1_conv_4_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_4_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_1_conv_4_conv2d_bn"
+  top: "mixed_4_tower_1_conv_4_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_4_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_3_chconcat"
+  top: "AVE_pool_mixed_4_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_4_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_4_pool"
+  top: "mixed_4_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_4_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_2_conv_conv2d"
+  top: "mixed_4_tower_2_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_4_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_2_conv_conv2d_bn"
+  top: "mixed_4_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_4_chconcat"
+  type: "Concat"
+  bottom: "mixed_4_conv_conv2d_relu"
+  bottom: "mixed_4_tower_conv_2_conv2d_relu"
+  bottom: "mixed_4_tower_1_conv_4_conv2d_relu"
+  bottom: "mixed_4_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_4_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_5_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_4_chconcat"
+  top: "mixed_5_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_5_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_conv_conv2d"
+  top: "mixed_5_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_5_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_5_conv_conv2d_bn"
+  top: "mixed_5_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_4_chconcat"
+  top: "mixed_5_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_5_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_conv_conv2d"
+  top: "mixed_5_tower_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_5_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_conv_conv2d_bn"
+  top: "mixed_5_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_conv_conv2d_relu"
+  top: "mixed_5_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_5_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_conv_1_conv2d"
+  top: "mixed_5_tower_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_5_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_conv_1_conv2d_bn"
+  top: "mixed_5_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_conv_1_conv2d_relu"
+  top: "mixed_5_tower_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_5_tower_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_conv_2_conv2d"
+  top: "mixed_5_tower_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_5_tower_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_conv_2_conv2d_bn"
+  top: "mixed_5_tower_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_4_chconcat"
+  top: "mixed_5_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_1_conv_conv2d"
+  top: "mixed_5_tower_1_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_1_conv_conv2d_bn"
+  top: "mixed_5_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_1_conv_conv2d_relu"
+  top: "mixed_5_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_1_conv_1_conv2d"
+  top: "mixed_5_tower_1_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_1_conv_1_conv2d_bn"
+  top: "mixed_5_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_1_conv_1_conv2d_relu"
+  top: "mixed_5_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_1_conv_2_conv2d"
+  top: "mixed_5_tower_1_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_1_conv_2_conv2d_bn"
+  top: "mixed_5_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_1_conv_3_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_1_conv_2_conv2d_relu"
+  top: "mixed_5_tower_1_conv_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_1_conv_3_conv2d"
+  top: "mixed_5_tower_1_conv_3_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_3_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_1_conv_3_conv2d_bn"
+  top: "mixed_5_tower_1_conv_3_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_1_conv_4_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_1_conv_3_conv2d_relu"
+  top: "mixed_5_tower_1_conv_4_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_4_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_1_conv_4_conv2d"
+  top: "mixed_5_tower_1_conv_4_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_4_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_1_conv_4_conv2d_bn"
+  top: "mixed_5_tower_1_conv_4_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_5_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_4_chconcat"
+  top: "AVE_pool_mixed_5_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_5_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_5_pool"
+  top: "mixed_5_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_5_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_2_conv_conv2d"
+  top: "mixed_5_tower_2_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_5_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_2_conv_conv2d_bn"
+  top: "mixed_5_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_5_chconcat"
+  type: "Concat"
+  bottom: "mixed_5_conv_conv2d_relu"
+  bottom: "mixed_5_tower_conv_2_conv2d_relu"
+  bottom: "mixed_5_tower_1_conv_4_conv2d_relu"
+  bottom: "mixed_5_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_5_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_6_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_5_chconcat"
+  top: "mixed_6_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_6_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_conv_conv2d"
+  top: "mixed_6_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_6_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_6_conv_conv2d_bn"
+  top: "mixed_6_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_5_chconcat"
+  top: "mixed_6_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_6_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_conv_conv2d"
+  top: "mixed_6_tower_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_6_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_conv_conv2d_bn"
+  top: "mixed_6_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_conv_conv2d_relu"
+  top: "mixed_6_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_6_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_conv_1_conv2d"
+  top: "mixed_6_tower_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_6_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_conv_1_conv2d_bn"
+  top: "mixed_6_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_conv_1_conv2d_relu"
+  top: "mixed_6_tower_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_6_tower_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_conv_2_conv2d"
+  top: "mixed_6_tower_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_6_tower_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_conv_2_conv2d_bn"
+  top: "mixed_6_tower_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_5_chconcat"
+  top: "mixed_6_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_1_conv_conv2d"
+  top: "mixed_6_tower_1_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_1_conv_conv2d_bn"
+  top: "mixed_6_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_1_conv_conv2d_relu"
+  top: "mixed_6_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_1_conv_1_conv2d"
+  top: "mixed_6_tower_1_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_1_conv_1_conv2d_bn"
+  top: "mixed_6_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_1_conv_1_conv2d_relu"
+  top: "mixed_6_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_1_conv_2_conv2d"
+  top: "mixed_6_tower_1_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_1_conv_2_conv2d_bn"
+  top: "mixed_6_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_1_conv_3_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_1_conv_2_conv2d_relu"
+  top: "mixed_6_tower_1_conv_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_1_conv_3_conv2d"
+  top: "mixed_6_tower_1_conv_3_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_3_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_1_conv_3_conv2d_bn"
+  top: "mixed_6_tower_1_conv_3_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_1_conv_4_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_1_conv_3_conv2d_relu"
+  top: "mixed_6_tower_1_conv_4_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_4_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_1_conv_4_conv2d"
+  top: "mixed_6_tower_1_conv_4_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_4_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_1_conv_4_conv2d_bn"
+  top: "mixed_6_tower_1_conv_4_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_6_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_5_chconcat"
+  top: "AVE_pool_mixed_6_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_6_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_6_pool"
+  top: "mixed_6_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_6_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_2_conv_conv2d"
+  top: "mixed_6_tower_2_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_6_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_2_conv_conv2d_bn"
+  top: "mixed_6_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_6_chconcat"
+  type: "Concat"
+  bottom: "mixed_6_conv_conv2d_relu"
+  bottom: "mixed_6_tower_conv_2_conv2d_relu"
+  bottom: "mixed_6_tower_1_conv_4_conv2d_relu"
+  bottom: "mixed_6_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_6_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_7_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_6_chconcat"
+  top: "mixed_7_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_7_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_conv_conv2d"
+  top: "mixed_7_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_7_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_7_conv_conv2d_bn"
+  top: "mixed_7_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_6_chconcat"
+  top: "mixed_7_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_7_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_conv_conv2d"
+  top: "mixed_7_tower_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_7_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_conv_conv2d_bn"
+  top: "mixed_7_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_conv_conv2d_relu"
+  top: "mixed_7_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_7_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_conv_1_conv2d"
+  top: "mixed_7_tower_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_7_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_conv_1_conv2d_bn"
+  top: "mixed_7_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_conv_1_conv2d_relu"
+  top: "mixed_7_tower_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_7_tower_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_conv_2_conv2d"
+  top: "mixed_7_tower_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_7_tower_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_conv_2_conv2d_bn"
+  top: "mixed_7_tower_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_6_chconcat"
+  top: "mixed_7_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_1_conv_conv2d"
+  top: "mixed_7_tower_1_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_1_conv_conv2d_bn"
+  top: "mixed_7_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_1_conv_conv2d_relu"
+  top: "mixed_7_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_1_conv_1_conv2d"
+  top: "mixed_7_tower_1_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_1_conv_1_conv2d_bn"
+  top: "mixed_7_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_1_conv_1_conv2d_relu"
+  top: "mixed_7_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_1_conv_2_conv2d"
+  top: "mixed_7_tower_1_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_1_conv_2_conv2d_bn"
+  top: "mixed_7_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_1_conv_3_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_1_conv_2_conv2d_relu"
+  top: "mixed_7_tower_1_conv_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_1_conv_3_conv2d"
+  top: "mixed_7_tower_1_conv_3_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_3_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_1_conv_3_conv2d_bn"
+  top: "mixed_7_tower_1_conv_3_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_1_conv_4_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_1_conv_3_conv2d_relu"
+  top: "mixed_7_tower_1_conv_4_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_4_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_1_conv_4_conv2d"
+  top: "mixed_7_tower_1_conv_4_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_4_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_1_conv_4_conv2d_bn"
+  top: "mixed_7_tower_1_conv_4_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_7_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_6_chconcat"
+  top: "AVE_pool_mixed_7_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_7_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_7_pool"
+  top: "mixed_7_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_7_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_2_conv_conv2d"
+  top: "mixed_7_tower_2_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_7_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_2_conv_conv2d_bn"
+  top: "mixed_7_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_7_chconcat"
+  type: "Concat"
+  bottom: "mixed_7_conv_conv2d_relu"
+  bottom: "mixed_7_tower_conv_2_conv2d_relu"
+  bottom: "mixed_7_tower_1_conv_4_conv2d_relu"
+  bottom: "mixed_7_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_7_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_8_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_7_chconcat"
+  top: "mixed_8_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_8_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_conv_conv2d"
+  top: "mixed_8_tower_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_8_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_conv_conv2d_bn"
+  top: "mixed_8_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_8_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_8_tower_conv_conv2d_relu"
+  top: "mixed_8_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 320
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_8_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_conv_1_conv2d"
+  top: "mixed_8_tower_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_8_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_conv_1_conv2d_bn"
+  top: "mixed_8_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_8_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_7_chconcat"
+  top: "mixed_8_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_1_conv_conv2d"
+  top: "mixed_8_tower_1_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_1_conv_conv2d_bn"
+  top: "mixed_8_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_8_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_8_tower_1_conv_conv2d_relu"
+  top: "mixed_8_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_1_conv_1_conv2d"
+  top: "mixed_8_tower_1_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_1_conv_1_conv2d_bn"
+  top: "mixed_8_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_8_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_8_tower_1_conv_1_conv2d_relu"
+  top: "mixed_8_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_1_conv_2_conv2d"
+  top: "mixed_8_tower_1_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_1_conv_2_conv2d_bn"
+  top: "mixed_8_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_8_tower_1_conv_3_conv2d"
+  type: "Convolution"
+  bottom: "mixed_8_tower_1_conv_2_conv2d_relu"
+  top: "mixed_8_tower_1_conv_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_1_conv_3_conv2d"
+  top: "mixed_8_tower_1_conv_3_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_3_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_1_conv_3_conv2d_bn"
+  top: "mixed_8_tower_1_conv_3_conv2d_relu"
+}
+layer {
+  name: "MAX_pool_mixed_8_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_7_chconcat"
+  top: "MAX_pool_mixed_8_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+    pad: 0
+  }
+}
+layer {
+  name: "ch_concat_mixed_8_chconcat"
+  type: "Concat"
+  bottom: "mixed_8_tower_conv_1_conv2d_relu"
+  bottom: "mixed_8_tower_1_conv_3_conv2d_relu"
+  bottom: "MAX_pool_mixed_8_pool"
+  top: "ch_concat_mixed_8_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_9_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_8_chconcat"
+  top: "mixed_9_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 320
+    bias_term: false
+    pad: 0
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    kernel_h: 1
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_9_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_conv_conv2d"
+  top: "mixed_9_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_9_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_conv_conv2d_bn"
+  top: "mixed_9_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_8_chconcat"
+  top: "mixed_9_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_9_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_conv_conv2d"
+  top: "mixed_9_tower_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_9_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_conv_conv2d_bn"
+  top: "mixed_9_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_conv2d"
+  type: "Convolution"
+  bottom: "mixed_9_tower_conv_conv2d_relu"
+  top: "mixed_9_tower_mixed_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 1
+    kernel_h: 1
+    kernel_w: 3
+  }
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_mixed_conv_conv2d"
+  top: "mixed_9_tower_mixed_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_mixed_conv_conv2d_bn"
+  top: "mixed_9_tower_mixed_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_9_tower_conv_conv2d_relu"
+  top: "mixed_9_tower_mixed_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 1
+    pad_w: 0
+    kernel_h: 3
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_mixed_conv_1_conv2d"
+  top: "mixed_9_tower_mixed_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_mixed_conv_1_conv2d_bn"
+  top: "mixed_9_tower_mixed_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_8_chconcat"
+  top: "mixed_9_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 448
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_9_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_1_conv_conv2d"
+  top: "mixed_9_tower_1_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_9_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_1_conv_conv2d_bn"
+  top: "mixed_9_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_9_tower_1_conv_conv2d_relu"
+  top: "mixed_9_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_9_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_1_conv_1_conv2d"
+  top: "mixed_9_tower_1_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_9_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_1_conv_1_conv2d_bn"
+  top: "mixed_9_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_conv2d"
+  type: "Convolution"
+  bottom: "mixed_9_tower_1_conv_1_conv2d_relu"
+  top: "mixed_9_tower_1_mixed_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 1
+    kernel_h: 1
+    kernel_w: 3
+  }
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_1_mixed_conv_conv2d"
+  top: "mixed_9_tower_1_mixed_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_1_mixed_conv_conv2d_bn"
+  top: "mixed_9_tower_1_mixed_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_9_tower_1_conv_1_conv2d_relu"
+  top: "mixed_9_tower_1_mixed_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 1
+    pad_w: 0
+    kernel_h: 3
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d"
+  top: "mixed_9_tower_1_mixed_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d_bn"
+  top: "mixed_9_tower_1_mixed_conv_1_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_9_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_8_chconcat"
+  top: "AVE_pool_mixed_9_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_9_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_9_pool"
+  top: "mixed_9_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_9_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_2_conv_conv2d"
+  top: "mixed_9_tower_2_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_9_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_2_conv_conv2d_bn"
+  top: "mixed_9_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_9_chconcat"
+  type: "Concat"
+  bottom: "mixed_9_conv_conv2d_relu"
+  bottom: "mixed_9_tower_mixed_conv_conv2d_relu"
+  bottom: "mixed_9_tower_mixed_conv_1_conv2d_relu"
+  bottom: "mixed_9_tower_1_mixed_conv_conv2d_relu"
+  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d_relu"
+  bottom: "mixed_9_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_9_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_10_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_9_chconcat"
+  top: "mixed_10_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 320
+    bias_term: false
+    pad: 0
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    kernel_h: 1
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_10_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_conv_conv2d"
+  top: "mixed_10_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_10_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_conv_conv2d_bn"
+  top: "mixed_10_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_9_chconcat"
+  top: "mixed_10_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_10_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_conv_conv2d"
+  top: "mixed_10_tower_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_10_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_conv_conv2d_bn"
+  top: "mixed_10_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_conv2d"
+  type: "Convolution"
+  bottom: "mixed_10_tower_conv_conv2d_relu"
+  top: "mixed_10_tower_mixed_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 1
+    kernel_h: 1
+    kernel_w: 3
+  }
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_mixed_conv_conv2d"
+  top: "mixed_10_tower_mixed_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_mixed_conv_conv2d_bn"
+  top: "mixed_10_tower_mixed_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_10_tower_conv_conv2d_relu"
+  top: "mixed_10_tower_mixed_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 1
+    pad_w: 0
+    kernel_h: 3
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_mixed_conv_1_conv2d"
+  top: "mixed_10_tower_mixed_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_mixed_conv_1_conv2d_bn"
+  top: "mixed_10_tower_mixed_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_9_chconcat"
+  top: "mixed_10_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 448
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_10_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_1_conv_conv2d"
+  top: "mixed_10_tower_1_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_10_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_1_conv_conv2d_bn"
+  top: "mixed_10_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_10_tower_1_conv_conv2d_relu"
+  top: "mixed_10_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_10_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_1_conv_1_conv2d"
+  top: "mixed_10_tower_1_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_10_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_1_conv_1_conv2d_bn"
+  top: "mixed_10_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_conv2d"
+  type: "Convolution"
+  bottom: "mixed_10_tower_1_conv_1_conv2d_relu"
+  top: "mixed_10_tower_1_mixed_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 1
+    kernel_h: 1
+    kernel_w: 3
+  }
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_1_mixed_conv_conv2d"
+  top: "mixed_10_tower_1_mixed_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_1_mixed_conv_conv2d_bn"
+  top: "mixed_10_tower_1_mixed_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_10_tower_1_conv_1_conv2d_relu"
+  top: "mixed_10_tower_1_mixed_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 1
+    pad_w: 0
+    kernel_h: 3
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d"
+  top: "mixed_10_tower_1_mixed_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d_bn"
+  top: "mixed_10_tower_1_mixed_conv_1_conv2d_relu"
+}
+layer {
+  name: "MAX_pool_mixed_10_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_9_chconcat"
+  top: "MAX_pool_mixed_10_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_10_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "MAX_pool_mixed_10_pool"
+  top: "mixed_10_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_10_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_2_conv_conv2d"
+  top: "mixed_10_tower_2_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_10_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_2_conv_conv2d_bn"
+  top: "mixed_10_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_10_chconcat"
+  type: "Concat"
+  bottom: "mixed_10_conv_conv2d_relu"
+  bottom: "mixed_10_tower_mixed_conv_conv2d_relu"
+  bottom: "mixed_10_tower_mixed_conv_1_conv2d_relu"
+  bottom: "mixed_10_tower_1_mixed_conv_conv2d_relu"
+  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d_relu"
+  bottom: "mixed_10_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_10_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "global_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_10_chconcat"
+  top: "global_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 8
+    stride: 1
+    pad: 0
+  }
+}
+layer {
+  name: "drop"
+  type: "Dropout"
+  bottom: "global_pool"
+  top: "global_pool"
+  dropout_param {
+    dropout_ratio: 0.8
+  }
+}
+layer {
+  name: "flatten"
+  type: "Flatten"
+  bottom: "global_pool"
+  top: "flatten"
+}
+layer {
+  name: "fc1"
+  type: "InnerProduct"
+  bottom: "flatten"
+  top: "fc1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0.0
+  }
+  inner_product_param {
+    num_output: 1000
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0.0
+    }
+  }
+}
+layer {
+  name: "loss"
+  type: "SoftmaxWithLoss"
+  bottom: "fc1"
+  bottom: "label"
+  top: "loss"
+}
+layer {
+  name: "loss3/top-1"
+  type: "Accuracy"
+  bottom: "fc1"
+  bottom: "label"
+  top: "loss3/top-1"
+  include {
+    phase: TEST
+  }
+}
+layer {
+  name: "loss3/top-5"
+  type: "Accuracy"
+  bottom: "fc1"
+  bottom: "label"
+  top: "loss3/top-5"
+  include {
+    phase: TEST
+  }
+  accuracy_param {
+    top_k: 5
+  }
+}
\ No newline at end of file
diff --git a/models/intel_optimized_models/googlenet_v3/mkl/nodata_solver.prototxt b/models/intel_optimized_models/googlenet_v3/mkl/nodata_solver.prototxt
new file mode 100644
index 000000000..ab76852c9
--- /dev/null
+++ b/models/intel_optimized_models/googlenet_v3/mkl/nodata_solver.prototxt
@@ -0,0 +1,16 @@
+net: "models/intel_optimized_models/googlenet_v3/mkl/nodata_train_val.prototxt"
+test_iter: 1000
+test_interval: 10000
+test_initialization: false
+display: 40
+average_loss: 40
+base_lr: 0.045
+lr_policy: "step"
+stepsize: 6400
+gamma: 0.96
+max_iter: 1200000
+momentum: 0.9
+weight_decay: 0.0002
+snapshot: 50000
+snapshot_prefix: "models/intel_optimized_models/googlenet_v3/mkl/googlenet_v3_mkl_nodata"
+solver_mode: CPU
diff --git a/models/intel_optimized_models/googlenet_v3/mkl/nodata_train_val.prototxt b/models/intel_optimized_models/googlenet_v3/mkl/nodata_train_val.prototxt
new file mode 100644
index 000000000..57ba235ac
--- /dev/null
+++ b/models/intel_optimized_models/googlenet_v3/mkl/nodata_train_val.prototxt
@@ -0,0 +1,3860 @@
+name: "InceptionV3"
+
+layer {
+  name: "data"
+  type: "DummyData"
+  top: "data"
+  include {
+    phase: TRAIN
+  }
+  dummy_data_param {
+    shape: { dim: 22 dim: 3 dim: 299 dim: 299 }
+    data_filler {
+      type: "constant"
+      value: 0.01
+    }
+  }
+}
+layer {
+  name: "data"
+  type: "DummyData"
+  top: "label"
+  include {
+    phase: TRAIN
+  }
+  dummy_data_param {
+    shape: { dim: 22  }
+    data_filler {
+      type: "constant"
+    }
+  }
+}
+#--------------------
+
+
+layer {
+  name: "conv_conv2d"
+  type: "Convolution"
+  bottom: "data"
+  top: "conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 32
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "conv_conv2d"
+  top: "conv_conv2d_bn"
+
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "conv_relu"
+  type: "ReLU"
+  bottom: "conv_conv2d_bn"
+  top: "conv_conv2d_relu"
+}
+layer {
+  name: "conv_1_1_conv2d"
+  type: "Convolution"
+  bottom: "conv_conv2d_relu"
+  top: "conv_1_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 32
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "conv_1_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "conv_1_1_conv2d"
+  top: "conv_1_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "conv_1_1_relu"
+  type: "ReLU"
+  bottom: "conv_1_1_conv2d_bn"
+  top: "conv_1_1_conv2d_relu"
+}
+layer {
+  name: "conv_2_2_conv2d"
+  type: "Convolution"
+  bottom: "conv_1_1_conv2d_relu"
+  top: "conv_2_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "conv_2_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "conv_2_2_conv2d"
+  top: "conv_2_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "conv_2_2_relu"
+  type: "ReLU"
+  bottom: "conv_2_2_conv2d_bn"
+  top: "conv_2_2_conv2d_relu"
+}
+layer {
+  name: "pool"
+  type: "Pooling"
+  bottom: "conv_2_2_conv2d_relu"
+  top: "pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+    pad: 0
+  }
+}
+layer {
+  name: "conv_3_3_conv2d"
+  type: "Convolution"
+  bottom: "pool"
+  top: "conv_3_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 80
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "conv_3_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "conv_3_3_conv2d"
+  top: "conv_3_3_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "conv_3_3_relu"
+  type: "ReLU"
+  bottom: "conv_3_3_conv2d_bn"
+  top: "conv_3_3_conv2d_relu"
+}
+layer {
+  name: "conv_4_4_conv2d"
+  type: "Convolution"
+  bottom: "conv_3_3_conv2d_relu"
+  top: "conv_4_4_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "conv_4_4_batchnorm"
+  type: "BatchNorm"
+  bottom: "conv_4_4_conv2d"
+  top: "conv_4_4_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "conv_4_4_relu"
+  type: "ReLU"
+  bottom: "conv_4_4_conv2d_bn"
+  top: "conv_4_4_conv2d_relu"
+}
+layer {
+  name: "pool1"
+  type: "Pooling"
+  bottom: "conv_4_4_conv2d_relu"
+  top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+    pad: 0
+  }
+}
+layer {
+  name: "mixed_conv_conv2d"
+  type: "Convolution"
+  bottom: "pool1"
+  top: "mixed_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_conv_conv2d"
+  top: "mixed_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_conv_conv2d_bn"
+  top: "mixed_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "pool1"
+  top: "mixed_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 48
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_conv_conv2d"
+  top: "mixed_tower_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_conv_conv2d_bn"
+  top: "mixed_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_tower_conv_conv2d_relu"
+  top: "mixed_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_conv_1_conv2d"
+  top: "mixed_tower_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_conv_1_conv2d_bn"
+  top: "mixed_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "pool1"
+  top: "mixed_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_1_conv_conv2d"
+  top: "mixed_tower_1_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_1_conv_conv2d_bn"
+  top: "mixed_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_tower_1_conv_conv2d_relu"
+  top: "mixed_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_1_conv_1_conv2d"
+  top: "mixed_tower_1_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_1_conv_1_conv2d_bn"
+  top: "mixed_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_tower_1_conv_1_conv2d_relu"
+  top: "mixed_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_1_conv_2_conv2d"
+  top: "mixed_tower_1_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_1_conv_2_conv2d_bn"
+  top: "mixed_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_pool"
+  type: "Pooling"
+  bottom: "pool1"
+  top: "AVE_pool_mixed_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_pool"
+  top: "mixed_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 32
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_2_conv_conv2d"
+  top: "mixed_tower_2_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_2_conv_conv2d_bn"
+  top: "mixed_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_chconcat"
+  type: "Concat"
+  bottom: "mixed_conv_conv2d_relu"
+  bottom: "mixed_tower_conv_1_conv2d_relu"
+  bottom: "mixed_tower_1_conv_2_conv2d_relu"
+  bottom: "mixed_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_chconcat"
+  top: "mixed_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_conv_conv2d"
+  top: "mixed_1_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_1_conv_conv2d_bn"
+  top: "mixed_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_1_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_chconcat"
+  top: "mixed_1_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 48
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_conv_conv2d"
+  top: "mixed_1_tower_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_1_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_conv_conv2d_bn"
+  top: "mixed_1_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_1_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_1_tower_conv_conv2d_relu"
+  top: "mixed_1_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_conv_1_conv2d"
+  top: "mixed_1_tower_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_1_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_conv_1_conv2d_bn"
+  top: "mixed_1_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_1_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_chconcat"
+  top: "mixed_1_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_1_conv_conv2d"
+  top: "mixed_1_tower_1_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_1_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_1_conv_conv2d_bn"
+  top: "mixed_1_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_1_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_1_tower_1_conv_conv2d_relu"
+  top: "mixed_1_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_1_conv_1_conv2d"
+  top: "mixed_1_tower_1_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_1_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_1_conv_1_conv2d_bn"
+  top: "mixed_1_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_1_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_1_tower_1_conv_1_conv2d_relu"
+  top: "mixed_1_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_1_conv_2_conv2d"
+  top: "mixed_1_tower_1_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_1_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_1_conv_2_conv2d_bn"
+  top: "mixed_1_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_1_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_chconcat"
+  top: "AVE_pool_mixed_1_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_1_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_1_pool"
+  top: "mixed_1_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_2_conv_conv2d"
+  top: "mixed_1_tower_2_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_1_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_2_conv_conv2d_bn"
+  top: "mixed_1_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_1_chconcat"
+  type: "Concat"
+  bottom: "mixed_1_conv_conv2d_relu"
+  bottom: "mixed_1_tower_conv_1_conv2d_relu"
+  bottom: "mixed_1_tower_1_conv_2_conv2d_relu"
+  bottom: "mixed_1_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_1_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_1_chconcat"
+  top: "mixed_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_conv_conv2d"
+  top: "mixed_2_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_2_conv_conv2d_bn"
+  top: "mixed_2_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_2_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_1_chconcat"
+  top: "mixed_2_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 48
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_conv_conv2d"
+  top: "mixed_2_tower_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_2_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_conv_conv2d_bn"
+  top: "mixed_2_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_2_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_2_tower_conv_conv2d_relu"
+  top: "mixed_2_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_conv_1_conv2d"
+  top: "mixed_2_tower_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_2_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_conv_1_conv2d_bn"
+  top: "mixed_2_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_2_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_1_chconcat"
+  top: "mixed_2_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_1_conv_conv2d"
+  top: "mixed_2_tower_1_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_2_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_1_conv_conv2d_bn"
+  top: "mixed_2_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_2_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_2_tower_1_conv_conv2d_relu"
+  top: "mixed_2_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_1_conv_1_conv2d"
+  top: "mixed_2_tower_1_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_2_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_1_conv_1_conv2d_bn"
+  top: "mixed_2_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_2_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_2_tower_1_conv_1_conv2d_relu"
+  top: "mixed_2_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_1_conv_2_conv2d"
+  top: "mixed_2_tower_1_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_2_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_1_conv_2_conv2d_bn"
+  top: "mixed_2_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_2_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_1_chconcat"
+  top: "AVE_pool_mixed_2_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_2_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_2_pool"
+  top: "mixed_2_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_2_conv_conv2d"
+  top: "mixed_2_tower_2_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_2_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_2_conv_conv2d_bn"
+  top: "mixed_2_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_2_chconcat"
+  type: "Concat"
+  bottom: "mixed_2_conv_conv2d_relu"
+  bottom: "mixed_2_tower_conv_1_conv2d_relu"
+  bottom: "mixed_2_tower_1_conv_2_conv2d_relu"
+  bottom: "mixed_2_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_2_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_3_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_2_chconcat"
+  top: "mixed_3_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_3_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_3_conv_conv2d"
+  top: "mixed_3_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_3_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_3_conv_conv2d_bn"
+  top: "mixed_3_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_3_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_2_chconcat"
+  top: "mixed_3_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_3_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_3_tower_conv_conv2d"
+  top: "mixed_3_tower_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_3_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_3_tower_conv_conv2d_bn"
+  top: "mixed_3_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_3_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_3_tower_conv_conv2d_relu"
+  top: "mixed_3_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_3_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_3_tower_conv_1_conv2d"
+  top: "mixed_3_tower_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_3_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_3_tower_conv_1_conv2d_bn"
+  top: "mixed_3_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_3_tower_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_3_tower_conv_1_conv2d_relu"
+  top: "mixed_3_tower_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_3_tower_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_3_tower_conv_2_conv2d"
+  top: "mixed_3_tower_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_3_tower_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_3_tower_conv_2_conv2d_bn"
+  top: "mixed_3_tower_conv_2_conv2d_relu"
+}
+layer {
+  name: "max_pool_mixed_3_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_2_chconcat"
+  top: "max_pool_mixed_3_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+    pad: 0
+  }
+}
+layer {
+  name: "ch_concat_mixed_3_chconcat"
+  type: "Concat"
+  bottom: "max_pool_mixed_3_pool"
+  bottom: "mixed_3_conv_conv2d_relu"
+  bottom: "mixed_3_tower_conv_2_conv2d_relu"
+  top: "ch_concat_mixed_3_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_4_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_3_chconcat"
+  top: "mixed_4_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_4_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_conv_conv2d"
+  top: "mixed_4_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_4_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_4_conv_conv2d_bn"
+  top: "mixed_4_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_3_chconcat"
+  top: "mixed_4_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_4_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_conv_conv2d"
+  top: "mixed_4_tower_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_4_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_conv_conv2d_bn"
+  top: "mixed_4_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_conv_conv2d_relu"
+  top: "mixed_4_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_4_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_conv_1_conv2d"
+  top: "mixed_4_tower_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_4_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_conv_1_conv2d_bn"
+  top: "mixed_4_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_conv_1_conv2d_relu"
+  top: "mixed_4_tower_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_4_tower_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_conv_2_conv2d"
+  top: "mixed_4_tower_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_4_tower_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_conv_2_conv2d_bn"
+  top: "mixed_4_tower_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_3_chconcat"
+  top: "mixed_4_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_1_conv_conv2d"
+  top: "mixed_4_tower_1_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_1_conv_conv2d_bn"
+  top: "mixed_4_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_1_conv_conv2d_relu"
+  top: "mixed_4_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_1_conv_1_conv2d"
+  top: "mixed_4_tower_1_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_1_conv_1_conv2d_bn"
+  top: "mixed_4_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_1_conv_1_conv2d_relu"
+  top: "mixed_4_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_1_conv_2_conv2d"
+  top: "mixed_4_tower_1_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_1_conv_2_conv2d_bn"
+  top: "mixed_4_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_1_conv_3_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_1_conv_2_conv2d_relu"
+  top: "mixed_4_tower_1_conv_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_1_conv_3_conv2d"
+  top: "mixed_4_tower_1_conv_3_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_3_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_1_conv_3_conv2d_bn"
+  top: "mixed_4_tower_1_conv_3_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_1_conv_4_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_1_conv_3_conv2d_relu"
+  top: "mixed_4_tower_1_conv_4_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_4_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_1_conv_4_conv2d"
+  top: "mixed_4_tower_1_conv_4_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_4_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_1_conv_4_conv2d_bn"
+  top: "mixed_4_tower_1_conv_4_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_4_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_3_chconcat"
+  top: "AVE_pool_mixed_4_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_4_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_4_pool"
+  top: "mixed_4_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_4_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_2_conv_conv2d"
+  top: "mixed_4_tower_2_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_4_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_2_conv_conv2d_bn"
+  top: "mixed_4_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_4_chconcat"
+  type: "Concat"
+  bottom: "mixed_4_conv_conv2d_relu"
+  bottom: "mixed_4_tower_conv_2_conv2d_relu"
+  bottom: "mixed_4_tower_1_conv_4_conv2d_relu"
+  bottom: "mixed_4_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_4_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_5_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_4_chconcat"
+  top: "mixed_5_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_5_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_conv_conv2d"
+  top: "mixed_5_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_5_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_5_conv_conv2d_bn"
+  top: "mixed_5_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_4_chconcat"
+  top: "mixed_5_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_5_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_conv_conv2d"
+  top: "mixed_5_tower_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_5_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_conv_conv2d_bn"
+  top: "mixed_5_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_conv_conv2d_relu"
+  top: "mixed_5_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_5_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_conv_1_conv2d"
+  top: "mixed_5_tower_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_5_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_conv_1_conv2d_bn"
+  top: "mixed_5_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_conv_1_conv2d_relu"
+  top: "mixed_5_tower_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_5_tower_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_conv_2_conv2d"
+  top: "mixed_5_tower_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_5_tower_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_conv_2_conv2d_bn"
+  top: "mixed_5_tower_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_4_chconcat"
+  top: "mixed_5_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_1_conv_conv2d"
+  top: "mixed_5_tower_1_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_1_conv_conv2d_bn"
+  top: "mixed_5_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_1_conv_conv2d_relu"
+  top: "mixed_5_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_1_conv_1_conv2d"
+  top: "mixed_5_tower_1_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_1_conv_1_conv2d_bn"
+  top: "mixed_5_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_1_conv_1_conv2d_relu"
+  top: "mixed_5_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_1_conv_2_conv2d"
+  top: "mixed_5_tower_1_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_1_conv_2_conv2d_bn"
+  top: "mixed_5_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_1_conv_3_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_1_conv_2_conv2d_relu"
+  top: "mixed_5_tower_1_conv_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_1_conv_3_conv2d"
+  top: "mixed_5_tower_1_conv_3_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_3_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_1_conv_3_conv2d_bn"
+  top: "mixed_5_tower_1_conv_3_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_1_conv_4_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_1_conv_3_conv2d_relu"
+  top: "mixed_5_tower_1_conv_4_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_4_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_1_conv_4_conv2d"
+  top: "mixed_5_tower_1_conv_4_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_4_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_1_conv_4_conv2d_bn"
+  top: "mixed_5_tower_1_conv_4_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_5_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_4_chconcat"
+  top: "AVE_pool_mixed_5_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_5_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_5_pool"
+  top: "mixed_5_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_5_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_2_conv_conv2d"
+  top: "mixed_5_tower_2_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_5_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_2_conv_conv2d_bn"
+  top: "mixed_5_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_5_chconcat"
+  type: "Concat"
+  bottom: "mixed_5_conv_conv2d_relu"
+  bottom: "mixed_5_tower_conv_2_conv2d_relu"
+  bottom: "mixed_5_tower_1_conv_4_conv2d_relu"
+  bottom: "mixed_5_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_5_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_6_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_5_chconcat"
+  top: "mixed_6_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_6_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_conv_conv2d"
+  top: "mixed_6_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_6_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_6_conv_conv2d_bn"
+  top: "mixed_6_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_5_chconcat"
+  top: "mixed_6_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_6_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_conv_conv2d"
+  top: "mixed_6_tower_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_6_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_conv_conv2d_bn"
+  top: "mixed_6_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_conv_conv2d_relu"
+  top: "mixed_6_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_6_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_conv_1_conv2d"
+  top: "mixed_6_tower_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_6_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_conv_1_conv2d_bn"
+  top: "mixed_6_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_conv_1_conv2d_relu"
+  top: "mixed_6_tower_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_6_tower_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_conv_2_conv2d"
+  top: "mixed_6_tower_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_6_tower_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_conv_2_conv2d_bn"
+  top: "mixed_6_tower_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_5_chconcat"
+  top: "mixed_6_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_1_conv_conv2d"
+  top: "mixed_6_tower_1_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_1_conv_conv2d_bn"
+  top: "mixed_6_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_1_conv_conv2d_relu"
+  top: "mixed_6_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_1_conv_1_conv2d"
+  top: "mixed_6_tower_1_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_1_conv_1_conv2d_bn"
+  top: "mixed_6_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_1_conv_1_conv2d_relu"
+  top: "mixed_6_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_1_conv_2_conv2d"
+  top: "mixed_6_tower_1_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_1_conv_2_conv2d_bn"
+  top: "mixed_6_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_1_conv_3_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_1_conv_2_conv2d_relu"
+  top: "mixed_6_tower_1_conv_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_1_conv_3_conv2d"
+  top: "mixed_6_tower_1_conv_3_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_3_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_1_conv_3_conv2d_bn"
+  top: "mixed_6_tower_1_conv_3_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_1_conv_4_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_1_conv_3_conv2d_relu"
+  top: "mixed_6_tower_1_conv_4_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_4_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_1_conv_4_conv2d"
+  top: "mixed_6_tower_1_conv_4_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_4_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_1_conv_4_conv2d_bn"
+  top: "mixed_6_tower_1_conv_4_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_6_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_5_chconcat"
+  top: "AVE_pool_mixed_6_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_6_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_6_pool"
+  top: "mixed_6_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_6_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_2_conv_conv2d"
+  top: "mixed_6_tower_2_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_6_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_2_conv_conv2d_bn"
+  top: "mixed_6_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_6_chconcat"
+  type: "Concat"
+  bottom: "mixed_6_conv_conv2d_relu"
+  bottom: "mixed_6_tower_conv_2_conv2d_relu"
+  bottom: "mixed_6_tower_1_conv_4_conv2d_relu"
+  bottom: "mixed_6_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_6_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_7_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_6_chconcat"
+  top: "mixed_7_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_7_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_conv_conv2d"
+  top: "mixed_7_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_7_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_7_conv_conv2d_bn"
+  top: "mixed_7_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_6_chconcat"
+  top: "mixed_7_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_7_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_conv_conv2d"
+  top: "mixed_7_tower_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_7_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_conv_conv2d_bn"
+  top: "mixed_7_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_conv_conv2d_relu"
+  top: "mixed_7_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_7_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_conv_1_conv2d"
+  top: "mixed_7_tower_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_7_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_conv_1_conv2d_bn"
+  top: "mixed_7_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_conv_1_conv2d_relu"
+  top: "mixed_7_tower_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_7_tower_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_conv_2_conv2d"
+  top: "mixed_7_tower_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_7_tower_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_conv_2_conv2d_bn"
+  top: "mixed_7_tower_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_6_chconcat"
+  top: "mixed_7_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_1_conv_conv2d"
+  top: "mixed_7_tower_1_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_1_conv_conv2d_bn"
+  top: "mixed_7_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_1_conv_conv2d_relu"
+  top: "mixed_7_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_1_conv_1_conv2d"
+  top: "mixed_7_tower_1_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_1_conv_1_conv2d_bn"
+  top: "mixed_7_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_1_conv_1_conv2d_relu"
+  top: "mixed_7_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_1_conv_2_conv2d"
+  top: "mixed_7_tower_1_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_1_conv_2_conv2d_bn"
+  top: "mixed_7_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_1_conv_3_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_1_conv_2_conv2d_relu"
+  top: "mixed_7_tower_1_conv_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_1_conv_3_conv2d"
+  top: "mixed_7_tower_1_conv_3_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_3_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_1_conv_3_conv2d_bn"
+  top: "mixed_7_tower_1_conv_3_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_1_conv_4_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_1_conv_3_conv2d_relu"
+  top: "mixed_7_tower_1_conv_4_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_4_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_1_conv_4_conv2d"
+  top: "mixed_7_tower_1_conv_4_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_4_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_1_conv_4_conv2d_bn"
+  top: "mixed_7_tower_1_conv_4_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_7_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_6_chconcat"
+  top: "AVE_pool_mixed_7_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_7_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_7_pool"
+  top: "mixed_7_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_7_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_2_conv_conv2d"
+  top: "mixed_7_tower_2_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_7_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_2_conv_conv2d_bn"
+  top: "mixed_7_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_7_chconcat"
+  type: "Concat"
+  bottom: "mixed_7_conv_conv2d_relu"
+  bottom: "mixed_7_tower_conv_2_conv2d_relu"
+  bottom: "mixed_7_tower_1_conv_4_conv2d_relu"
+  bottom: "mixed_7_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_7_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_8_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_7_chconcat"
+  top: "mixed_8_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_8_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_conv_conv2d"
+  top: "mixed_8_tower_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_8_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_conv_conv2d_bn"
+  top: "mixed_8_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_8_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_8_tower_conv_conv2d_relu"
+  top: "mixed_8_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 320
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_8_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_conv_1_conv2d"
+  top: "mixed_8_tower_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_8_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_conv_1_conv2d_bn"
+  top: "mixed_8_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_8_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_7_chconcat"
+  top: "mixed_8_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_1_conv_conv2d"
+  top: "mixed_8_tower_1_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_1_conv_conv2d_bn"
+  top: "mixed_8_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_8_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_8_tower_1_conv_conv2d_relu"
+  top: "mixed_8_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_1_conv_1_conv2d"
+  top: "mixed_8_tower_1_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_1_conv_1_conv2d_bn"
+  top: "mixed_8_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_8_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_8_tower_1_conv_1_conv2d_relu"
+  top: "mixed_8_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_1_conv_2_conv2d"
+  top: "mixed_8_tower_1_conv_2_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_1_conv_2_conv2d_bn"
+  top: "mixed_8_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_8_tower_1_conv_3_conv2d"
+  type: "Convolution"
+  bottom: "mixed_8_tower_1_conv_2_conv2d_relu"
+  top: "mixed_8_tower_1_conv_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_1_conv_3_conv2d"
+  top: "mixed_8_tower_1_conv_3_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_3_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_1_conv_3_conv2d_bn"
+  top: "mixed_8_tower_1_conv_3_conv2d_relu"
+}
+layer {
+  name: "MAX_pool_mixed_8_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_7_chconcat"
+  top: "MAX_pool_mixed_8_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+    pad: 0
+  }
+}
+layer {
+  name: "ch_concat_mixed_8_chconcat"
+  type: "Concat"
+  bottom: "mixed_8_tower_conv_1_conv2d_relu"
+  bottom: "mixed_8_tower_1_conv_3_conv2d_relu"
+  bottom: "MAX_pool_mixed_8_pool"
+  top: "ch_concat_mixed_8_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_9_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_8_chconcat"
+  top: "mixed_9_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 320
+    bias_term: false
+    pad: 0
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    kernel_h: 1
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_9_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_conv_conv2d"
+  top: "mixed_9_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_9_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_conv_conv2d_bn"
+  top: "mixed_9_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_8_chconcat"
+  top: "mixed_9_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_9_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_conv_conv2d"
+  top: "mixed_9_tower_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_9_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_conv_conv2d_bn"
+  top: "mixed_9_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_conv2d"
+  type: "Convolution"
+  bottom: "mixed_9_tower_conv_conv2d_relu"
+  top: "mixed_9_tower_mixed_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 1
+    kernel_h: 1
+    kernel_w: 3
+  }
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_mixed_conv_conv2d"
+  top: "mixed_9_tower_mixed_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_mixed_conv_conv2d_bn"
+  top: "mixed_9_tower_mixed_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_9_tower_conv_conv2d_relu"
+  top: "mixed_9_tower_mixed_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 1
+    pad_w: 0
+    kernel_h: 3
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_mixed_conv_1_conv2d"
+  top: "mixed_9_tower_mixed_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_mixed_conv_1_conv2d_bn"
+  top: "mixed_9_tower_mixed_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_8_chconcat"
+  top: "mixed_9_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 448
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_9_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_1_conv_conv2d"
+  top: "mixed_9_tower_1_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_9_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_1_conv_conv2d_bn"
+  top: "mixed_9_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_9_tower_1_conv_conv2d_relu"
+  top: "mixed_9_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_9_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_1_conv_1_conv2d"
+  top: "mixed_9_tower_1_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_9_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_1_conv_1_conv2d_bn"
+  top: "mixed_9_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_conv2d"
+  type: "Convolution"
+  bottom: "mixed_9_tower_1_conv_1_conv2d_relu"
+  top: "mixed_9_tower_1_mixed_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 1
+    kernel_h: 1
+    kernel_w: 3
+  }
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_1_mixed_conv_conv2d"
+  top: "mixed_9_tower_1_mixed_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_1_mixed_conv_conv2d_bn"
+  top: "mixed_9_tower_1_mixed_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_9_tower_1_conv_1_conv2d_relu"
+  top: "mixed_9_tower_1_mixed_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 1
+    pad_w: 0
+    kernel_h: 3
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d"
+  top: "mixed_9_tower_1_mixed_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d_bn"
+  top: "mixed_9_tower_1_mixed_conv_1_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_9_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_8_chconcat"
+  top: "AVE_pool_mixed_9_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_9_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_9_pool"
+  top: "mixed_9_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_9_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_2_conv_conv2d"
+  top: "mixed_9_tower_2_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_9_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_2_conv_conv2d_bn"
+  top: "mixed_9_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_9_chconcat"
+  type: "Concat"
+  bottom: "mixed_9_conv_conv2d_relu"
+  bottom: "mixed_9_tower_mixed_conv_conv2d_relu"
+  bottom: "mixed_9_tower_mixed_conv_1_conv2d_relu"
+  bottom: "mixed_9_tower_1_mixed_conv_conv2d_relu"
+  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d_relu"
+  bottom: "mixed_9_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_9_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_10_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_9_chconcat"
+  top: "mixed_10_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 320
+    bias_term: false
+    pad: 0
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    kernel_h: 1
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_10_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_conv_conv2d"
+  top: "mixed_10_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_10_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_conv_conv2d_bn"
+  top: "mixed_10_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_9_chconcat"
+  top: "mixed_10_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_10_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_conv_conv2d"
+  top: "mixed_10_tower_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_10_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_conv_conv2d_bn"
+  top: "mixed_10_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_conv2d"
+  type: "Convolution"
+  bottom: "mixed_10_tower_conv_conv2d_relu"
+  top: "mixed_10_tower_mixed_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 1
+    kernel_h: 1
+    kernel_w: 3
+  }
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_mixed_conv_conv2d"
+  top: "mixed_10_tower_mixed_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_mixed_conv_conv2d_bn"
+  top: "mixed_10_tower_mixed_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_10_tower_conv_conv2d_relu"
+  top: "mixed_10_tower_mixed_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 1
+    pad_w: 0
+    kernel_h: 3
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_mixed_conv_1_conv2d"
+  top: "mixed_10_tower_mixed_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_mixed_conv_1_conv2d_bn"
+  top: "mixed_10_tower_mixed_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_9_chconcat"
+  top: "mixed_10_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 448
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_10_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_1_conv_conv2d"
+  top: "mixed_10_tower_1_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_10_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_1_conv_conv2d_bn"
+  top: "mixed_10_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_10_tower_1_conv_conv2d_relu"
+  top: "mixed_10_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_10_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_1_conv_1_conv2d"
+  top: "mixed_10_tower_1_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_10_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_1_conv_1_conv2d_bn"
+  top: "mixed_10_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_conv2d"
+  type: "Convolution"
+  bottom: "mixed_10_tower_1_conv_1_conv2d_relu"
+  top: "mixed_10_tower_1_mixed_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 1
+    kernel_h: 1
+    kernel_w: 3
+  }
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_1_mixed_conv_conv2d"
+  top: "mixed_10_tower_1_mixed_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_1_mixed_conv_conv2d_bn"
+  top: "mixed_10_tower_1_mixed_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_10_tower_1_conv_1_conv2d_relu"
+  top: "mixed_10_tower_1_mixed_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 1
+    pad_w: 0
+    kernel_h: 3
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d"
+  top: "mixed_10_tower_1_mixed_conv_1_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d_bn"
+  top: "mixed_10_tower_1_mixed_conv_1_conv2d_relu"
+}
+layer {
+  name: "MAX_pool_mixed_10_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_9_chconcat"
+  top: "MAX_pool_mixed_10_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_10_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "MAX_pool_mixed_10_pool"
+  top: "mixed_10_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_10_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_2_conv_conv2d"
+  top: "mixed_10_tower_2_conv_conv2d_bn"
+  batch_norm_param {
+    engine: MKL2017
+  }
+}
+layer {
+  name: "mixed_10_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_2_conv_conv2d_bn"
+  top: "mixed_10_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_10_chconcat"
+  type: "Concat"
+  bottom: "mixed_10_conv_conv2d_relu"
+  bottom: "mixed_10_tower_mixed_conv_conv2d_relu"
+  bottom: "mixed_10_tower_mixed_conv_1_conv2d_relu"
+  bottom: "mixed_10_tower_1_mixed_conv_conv2d_relu"
+  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d_relu"
+  bottom: "mixed_10_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_10_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "global_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_10_chconcat"
+  top: "global_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 8
+    stride: 1
+    pad: 0
+  }
+}
+layer {
+  name: "drop"
+  type: "Dropout"
+  bottom: "global_pool"
+  top: "global_pool"
+  dropout_param {
+    dropout_ratio: 0.8
+  }
+}
+layer {
+  name: "flatten"
+  type: "Flatten"
+  bottom: "global_pool"
+  top: "flatten"
+}
+layer {
+  name: "fc1"
+  type: "InnerProduct"
+  bottom: "flatten"
+  top: "fc1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0.0
+  }
+  inner_product_param {
+    num_output: 1000
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0.0
+    }
+  }
+}
+layer {
+  name: "loss"
+  type: "SoftmaxWithLoss"
+  bottom: "fc1"
+  bottom: "label"
+  top: "loss"
+}
+layer {
+  name: "loss3/top-1"
+  type: "Accuracy"
+  bottom: "fc1"
+  bottom: "label"
+  top: "loss3/top-1"
+  include {
+    phase: TEST
+  }
+}
+layer {
+  name: "loss3/top-5"
+  type: "Accuracy"
+  bottom: "fc1"
+  bottom: "label"
+  top: "loss3/top-5"
+  include {
+    phase: TEST
+  }
+  accuracy_param {
+    top_k: 5
+  }
+}
\ No newline at end of file
diff --git a/models/intel_optimized_models/googlenet_v3/mkldnn/lmdb_solver.prototxt b/models/intel_optimized_models/googlenet_v3/mkldnn/lmdb_solver.prototxt
new file mode 100644
index 000000000..f113f0855
--- /dev/null
+++ b/models/intel_optimized_models/googlenet_v3/mkldnn/lmdb_solver.prototxt
@@ -0,0 +1,16 @@
+net: "models/intel_optimized_models/googlenet_v3/mkldnn/lmdb_train_val.prototxt"
+test_iter: 1000
+test_interval: 10000
+test_initialization: false
+display: 40
+average_loss: 40
+base_lr: 0.045
+lr_policy: "step"
+stepsize: 6400
+gamma: 0.96
+max_iter: 1200000
+momentum: 0.9
+weight_decay: 0.0002
+snapshot: 50000
+snapshot_prefix: "models/intel_optimized_models/googlenet_v3/mkldnn/googlenet_v3_mkldnn_lmdb"
+solver_mode: CPU
diff --git a/models/intel_optimized_models/googlenet_v3/mkldnn/lmdb_train_val.prototxt b/models/intel_optimized_models/googlenet_v3/mkldnn/lmdb_train_val.prototxt
new file mode 100644
index 000000000..6d51b7f6b
--- /dev/null
+++ b/models/intel_optimized_models/googlenet_v3/mkldnn/lmdb_train_val.prototxt
@@ -0,0 +1,3594 @@
+name: "InceptionV3"
+
+layer {
+  top: "data"
+  top: "label"
+  name: "data"
+  type: "Data"
+  data_param {
+    source: "/data/LMDB_300px/ilsvrc12_train_lmdb"
+    batch_size: 22
+    backend: LMDB
+#    shuffle: true
+  }
+  include {
+    phase: TRAIN
+  }
+  transform_param {
+    mirror: true
+    crop_size: 299
+#    mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
+    mean_value: 104
+    mean_value: 117
+    mean_value: 123
+  }
+}
+### Validation Set
+layer {
+  top: "data"
+  top: "label"
+  name: "data"
+  type: "Data"
+  data_param {
+    source: "/data/LMDB_300px/ilsvrc12_val_lmdb"
+    batch_size: 50
+    backend: LMDB
+  }
+  include {
+    phase: TEST
+  }
+  transform_param {
+    mirror: false
+    crop_size: 299
+#    mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
+    mean_value: 104
+    mean_value: 117
+    mean_value: 123
+  }
+}
+#--------------------
+
+
+layer {
+  name: "conv_conv2d"
+  type: "Convolution"
+  bottom: "data"
+  top: "conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 32
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "conv_conv2d"
+  top: "conv_conv2d_bn"
+}
+layer {
+  name: "conv_relu"
+  type: "ReLU"
+  bottom: "conv_conv2d_bn"
+  top: "conv_conv2d_relu"
+}
+layer {
+  name: "conv_1_1_conv2d"
+  type: "Convolution"
+  bottom: "conv_conv2d_relu"
+  top: "conv_1_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 32
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "conv_1_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "conv_1_1_conv2d"
+  top: "conv_1_1_conv2d_bn"
+}
+layer {
+  name: "conv_1_1_relu"
+  type: "ReLU"
+  bottom: "conv_1_1_conv2d_bn"
+  top: "conv_1_1_conv2d_relu"
+}
+layer {
+  name: "conv_2_2_conv2d"
+  type: "Convolution"
+  bottom: "conv_1_1_conv2d_relu"
+  top: "conv_2_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "conv_2_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "conv_2_2_conv2d"
+  top: "conv_2_2_conv2d_bn"
+}
+layer {
+  name: "conv_2_2_relu"
+  type: "ReLU"
+  bottom: "conv_2_2_conv2d_bn"
+  top: "conv_2_2_conv2d_relu"
+}
+layer {
+  name: "pool"
+  type: "Pooling"
+  bottom: "conv_2_2_conv2d_relu"
+  top: "pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+    pad: 0
+  }
+}
+layer {
+  name: "conv_3_3_conv2d"
+  type: "Convolution"
+  bottom: "pool"
+  top: "conv_3_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 80
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "conv_3_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "conv_3_3_conv2d"
+  top: "conv_3_3_conv2d_bn"
+}
+layer {
+  name: "conv_3_3_relu"
+  type: "ReLU"
+  bottom: "conv_3_3_conv2d_bn"
+  top: "conv_3_3_conv2d_relu"
+}
+layer {
+  name: "conv_4_4_conv2d"
+  type: "Convolution"
+  bottom: "conv_3_3_conv2d_relu"
+  top: "conv_4_4_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "conv_4_4_batchnorm"
+  type: "BatchNorm"
+  bottom: "conv_4_4_conv2d"
+  top: "conv_4_4_conv2d_bn"
+}
+layer {
+  name: "conv_4_4_relu"
+  type: "ReLU"
+  bottom: "conv_4_4_conv2d_bn"
+  top: "conv_4_4_conv2d_relu"
+}
+layer {
+  name: "pool1"
+  type: "Pooling"
+  bottom: "conv_4_4_conv2d_relu"
+  top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+    pad: 0
+  }
+}
+layer {
+  name: "mixed_conv_conv2d"
+  type: "Convolution"
+  bottom: "pool1"
+  top: "mixed_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_conv_conv2d"
+  top: "mixed_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_conv_conv2d_bn"
+  top: "mixed_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "pool1"
+  top: "mixed_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 48
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_conv_conv2d"
+  top: "mixed_tower_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_conv_conv2d_bn"
+  top: "mixed_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_tower_conv_conv2d_relu"
+  top: "mixed_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_conv_1_conv2d"
+  top: "mixed_tower_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_conv_1_conv2d_bn"
+  top: "mixed_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "pool1"
+  top: "mixed_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_1_conv_conv2d"
+  top: "mixed_tower_1_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_1_conv_conv2d_bn"
+  top: "mixed_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_tower_1_conv_conv2d_relu"
+  top: "mixed_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_1_conv_1_conv2d"
+  top: "mixed_tower_1_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_1_conv_1_conv2d_bn"
+  top: "mixed_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_tower_1_conv_1_conv2d_relu"
+  top: "mixed_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_1_conv_2_conv2d"
+  top: "mixed_tower_1_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_1_conv_2_conv2d_bn"
+  top: "mixed_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_pool"
+  type: "Pooling"
+  bottom: "pool1"
+  top: "AVE_pool_mixed_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_pool"
+  top: "mixed_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 32
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_2_conv_conv2d"
+  top: "mixed_tower_2_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_2_conv_conv2d_bn"
+  top: "mixed_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_chconcat"
+  type: "Concat"
+  bottom: "mixed_conv_conv2d_relu"
+  bottom: "mixed_tower_conv_1_conv2d_relu"
+  bottom: "mixed_tower_1_conv_2_conv2d_relu"
+  bottom: "mixed_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_chconcat"
+  top: "mixed_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_conv_conv2d"
+  top: "mixed_1_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_1_conv_conv2d_bn"
+  top: "mixed_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_1_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_chconcat"
+  top: "mixed_1_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 48
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_conv_conv2d"
+  top: "mixed_1_tower_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_1_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_conv_conv2d_bn"
+  top: "mixed_1_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_1_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_1_tower_conv_conv2d_relu"
+  top: "mixed_1_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_conv_1_conv2d"
+  top: "mixed_1_tower_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_1_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_conv_1_conv2d_bn"
+  top: "mixed_1_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_1_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_chconcat"
+  top: "mixed_1_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_1_conv_conv2d"
+  top: "mixed_1_tower_1_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_1_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_1_conv_conv2d_bn"
+  top: "mixed_1_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_1_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_1_tower_1_conv_conv2d_relu"
+  top: "mixed_1_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_1_conv_1_conv2d"
+  top: "mixed_1_tower_1_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_1_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_1_conv_1_conv2d_bn"
+  top: "mixed_1_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_1_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_1_tower_1_conv_1_conv2d_relu"
+  top: "mixed_1_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_1_conv_2_conv2d"
+  top: "mixed_1_tower_1_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_1_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_1_conv_2_conv2d_bn"
+  top: "mixed_1_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_1_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_chconcat"
+  top: "AVE_pool_mixed_1_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_1_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_1_pool"
+  top: "mixed_1_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_2_conv_conv2d"
+  top: "mixed_1_tower_2_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_1_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_2_conv_conv2d_bn"
+  top: "mixed_1_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_1_chconcat"
+  type: "Concat"
+  bottom: "mixed_1_conv_conv2d_relu"
+  bottom: "mixed_1_tower_conv_1_conv2d_relu"
+  bottom: "mixed_1_tower_1_conv_2_conv2d_relu"
+  bottom: "mixed_1_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_1_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_1_chconcat"
+  top: "mixed_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_conv_conv2d"
+  top: "mixed_2_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_2_conv_conv2d_bn"
+  top: "mixed_2_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_2_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_1_chconcat"
+  top: "mixed_2_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 48
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_conv_conv2d"
+  top: "mixed_2_tower_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_2_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_conv_conv2d_bn"
+  top: "mixed_2_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_2_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_2_tower_conv_conv2d_relu"
+  top: "mixed_2_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_conv_1_conv2d"
+  top: "mixed_2_tower_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_2_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_conv_1_conv2d_bn"
+  top: "mixed_2_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_2_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_1_chconcat"
+  top: "mixed_2_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_1_conv_conv2d"
+  top: "mixed_2_tower_1_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_2_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_1_conv_conv2d_bn"
+  top: "mixed_2_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_2_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_2_tower_1_conv_conv2d_relu"
+  top: "mixed_2_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_1_conv_1_conv2d"
+  top: "mixed_2_tower_1_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_2_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_1_conv_1_conv2d_bn"
+  top: "mixed_2_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_2_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_2_tower_1_conv_1_conv2d_relu"
+  top: "mixed_2_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_1_conv_2_conv2d"
+  top: "mixed_2_tower_1_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_2_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_1_conv_2_conv2d_bn"
+  top: "mixed_2_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_2_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_1_chconcat"
+  top: "AVE_pool_mixed_2_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_2_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_2_pool"
+  top: "mixed_2_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_2_conv_conv2d"
+  top: "mixed_2_tower_2_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_2_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_2_conv_conv2d_bn"
+  top: "mixed_2_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_2_chconcat"
+  type: "Concat"
+  bottom: "mixed_2_conv_conv2d_relu"
+  bottom: "mixed_2_tower_conv_1_conv2d_relu"
+  bottom: "mixed_2_tower_1_conv_2_conv2d_relu"
+  bottom: "mixed_2_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_2_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_3_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_2_chconcat"
+  top: "mixed_3_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_3_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_3_conv_conv2d"
+  top: "mixed_3_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_3_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_3_conv_conv2d_bn"
+  top: "mixed_3_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_3_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_2_chconcat"
+  top: "mixed_3_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_3_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_3_tower_conv_conv2d"
+  top: "mixed_3_tower_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_3_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_3_tower_conv_conv2d_bn"
+  top: "mixed_3_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_3_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_3_tower_conv_conv2d_relu"
+  top: "mixed_3_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_3_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_3_tower_conv_1_conv2d"
+  top: "mixed_3_tower_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_3_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_3_tower_conv_1_conv2d_bn"
+  top: "mixed_3_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_3_tower_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_3_tower_conv_1_conv2d_relu"
+  top: "mixed_3_tower_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_3_tower_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_3_tower_conv_2_conv2d"
+  top: "mixed_3_tower_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_3_tower_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_3_tower_conv_2_conv2d_bn"
+  top: "mixed_3_tower_conv_2_conv2d_relu"
+}
+layer {
+  name: "max_pool_mixed_3_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_2_chconcat"
+  top: "max_pool_mixed_3_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+    pad: 0
+  }
+}
+layer {
+  name: "ch_concat_mixed_3_chconcat"
+  type: "Concat"
+  bottom: "max_pool_mixed_3_pool"
+  bottom: "mixed_3_conv_conv2d_relu"
+  bottom: "mixed_3_tower_conv_2_conv2d_relu"
+  top: "ch_concat_mixed_3_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_4_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_3_chconcat"
+  top: "mixed_4_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_4_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_conv_conv2d"
+  top: "mixed_4_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_4_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_4_conv_conv2d_bn"
+  top: "mixed_4_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_3_chconcat"
+  top: "mixed_4_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_4_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_conv_conv2d"
+  top: "mixed_4_tower_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_4_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_conv_conv2d_bn"
+  top: "mixed_4_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_conv_conv2d_relu"
+  top: "mixed_4_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_4_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_conv_1_conv2d"
+  top: "mixed_4_tower_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_4_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_conv_1_conv2d_bn"
+  top: "mixed_4_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_conv_1_conv2d_relu"
+  top: "mixed_4_tower_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_4_tower_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_conv_2_conv2d"
+  top: "mixed_4_tower_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_4_tower_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_conv_2_conv2d_bn"
+  top: "mixed_4_tower_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_3_chconcat"
+  top: "mixed_4_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_1_conv_conv2d"
+  top: "mixed_4_tower_1_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_4_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_1_conv_conv2d_bn"
+  top: "mixed_4_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_1_conv_conv2d_relu"
+  top: "mixed_4_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_1_conv_1_conv2d"
+  top: "mixed_4_tower_1_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_4_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_1_conv_1_conv2d_bn"
+  top: "mixed_4_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_1_conv_1_conv2d_relu"
+  top: "mixed_4_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_1_conv_2_conv2d"
+  top: "mixed_4_tower_1_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_4_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_1_conv_2_conv2d_bn"
+  top: "mixed_4_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_1_conv_3_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_1_conv_2_conv2d_relu"
+  top: "mixed_4_tower_1_conv_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_1_conv_3_conv2d"
+  top: "mixed_4_tower_1_conv_3_conv2d_bn"
+}
+layer {
+  name: "mixed_4_tower_1_conv_3_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_1_conv_3_conv2d_bn"
+  top: "mixed_4_tower_1_conv_3_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_1_conv_4_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_1_conv_3_conv2d_relu"
+  top: "mixed_4_tower_1_conv_4_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_4_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_1_conv_4_conv2d"
+  top: "mixed_4_tower_1_conv_4_conv2d_bn"
+}
+layer {
+  name: "mixed_4_tower_1_conv_4_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_1_conv_4_conv2d_bn"
+  top: "mixed_4_tower_1_conv_4_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_4_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_3_chconcat"
+  top: "AVE_pool_mixed_4_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_4_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_4_pool"
+  top: "mixed_4_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_4_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_2_conv_conv2d"
+  top: "mixed_4_tower_2_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_4_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_2_conv_conv2d_bn"
+  top: "mixed_4_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_4_chconcat"
+  type: "Concat"
+  bottom: "mixed_4_conv_conv2d_relu"
+  bottom: "mixed_4_tower_conv_2_conv2d_relu"
+  bottom: "mixed_4_tower_1_conv_4_conv2d_relu"
+  bottom: "mixed_4_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_4_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_5_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_4_chconcat"
+  top: "mixed_5_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_5_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_conv_conv2d"
+  top: "mixed_5_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_5_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_5_conv_conv2d_bn"
+  top: "mixed_5_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_4_chconcat"
+  top: "mixed_5_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_5_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_conv_conv2d"
+  top: "mixed_5_tower_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_5_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_conv_conv2d_bn"
+  top: "mixed_5_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_conv_conv2d_relu"
+  top: "mixed_5_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_5_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_conv_1_conv2d"
+  top: "mixed_5_tower_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_5_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_conv_1_conv2d_bn"
+  top: "mixed_5_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_conv_1_conv2d_relu"
+  top: "mixed_5_tower_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_5_tower_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_conv_2_conv2d"
+  top: "mixed_5_tower_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_5_tower_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_conv_2_conv2d_bn"
+  top: "mixed_5_tower_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_4_chconcat"
+  top: "mixed_5_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_1_conv_conv2d"
+  top: "mixed_5_tower_1_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_5_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_1_conv_conv2d_bn"
+  top: "mixed_5_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_1_conv_conv2d_relu"
+  top: "mixed_5_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_1_conv_1_conv2d"
+  top: "mixed_5_tower_1_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_5_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_1_conv_1_conv2d_bn"
+  top: "mixed_5_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_1_conv_1_conv2d_relu"
+  top: "mixed_5_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_1_conv_2_conv2d"
+  top: "mixed_5_tower_1_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_5_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_1_conv_2_conv2d_bn"
+  top: "mixed_5_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_1_conv_3_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_1_conv_2_conv2d_relu"
+  top: "mixed_5_tower_1_conv_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_1_conv_3_conv2d"
+  top: "mixed_5_tower_1_conv_3_conv2d_bn"
+}
+layer {
+  name: "mixed_5_tower_1_conv_3_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_1_conv_3_conv2d_bn"
+  top: "mixed_5_tower_1_conv_3_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_1_conv_4_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_1_conv_3_conv2d_relu"
+  top: "mixed_5_tower_1_conv_4_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_4_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_1_conv_4_conv2d"
+  top: "mixed_5_tower_1_conv_4_conv2d_bn"
+}
+layer {
+  name: "mixed_5_tower_1_conv_4_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_1_conv_4_conv2d_bn"
+  top: "mixed_5_tower_1_conv_4_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_5_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_4_chconcat"
+  top: "AVE_pool_mixed_5_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_5_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_5_pool"
+  top: "mixed_5_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_5_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_2_conv_conv2d"
+  top: "mixed_5_tower_2_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_5_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_2_conv_conv2d_bn"
+  top: "mixed_5_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_5_chconcat"
+  type: "Concat"
+  bottom: "mixed_5_conv_conv2d_relu"
+  bottom: "mixed_5_tower_conv_2_conv2d_relu"
+  bottom: "mixed_5_tower_1_conv_4_conv2d_relu"
+  bottom: "mixed_5_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_5_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_6_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_5_chconcat"
+  top: "mixed_6_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_6_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_conv_conv2d"
+  top: "mixed_6_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_6_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_6_conv_conv2d_bn"
+  top: "mixed_6_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_5_chconcat"
+  top: "mixed_6_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_6_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_conv_conv2d"
+  top: "mixed_6_tower_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_6_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_conv_conv2d_bn"
+  top: "mixed_6_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_conv_conv2d_relu"
+  top: "mixed_6_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_6_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_conv_1_conv2d"
+  top: "mixed_6_tower_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_6_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_conv_1_conv2d_bn"
+  top: "mixed_6_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_conv_1_conv2d_relu"
+  top: "mixed_6_tower_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_6_tower_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_conv_2_conv2d"
+  top: "mixed_6_tower_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_6_tower_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_conv_2_conv2d_bn"
+  top: "mixed_6_tower_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_5_chconcat"
+  top: "mixed_6_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_1_conv_conv2d"
+  top: "mixed_6_tower_1_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_6_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_1_conv_conv2d_bn"
+  top: "mixed_6_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_1_conv_conv2d_relu"
+  top: "mixed_6_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_1_conv_1_conv2d"
+  top: "mixed_6_tower_1_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_6_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_1_conv_1_conv2d_bn"
+  top: "mixed_6_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_1_conv_1_conv2d_relu"
+  top: "mixed_6_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_1_conv_2_conv2d"
+  top: "mixed_6_tower_1_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_6_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_1_conv_2_conv2d_bn"
+  top: "mixed_6_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_1_conv_3_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_1_conv_2_conv2d_relu"
+  top: "mixed_6_tower_1_conv_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_1_conv_3_conv2d"
+  top: "mixed_6_tower_1_conv_3_conv2d_bn"
+}
+layer {
+  name: "mixed_6_tower_1_conv_3_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_1_conv_3_conv2d_bn"
+  top: "mixed_6_tower_1_conv_3_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_1_conv_4_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_1_conv_3_conv2d_relu"
+  top: "mixed_6_tower_1_conv_4_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_4_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_1_conv_4_conv2d"
+  top: "mixed_6_tower_1_conv_4_conv2d_bn"
+}
+layer {
+  name: "mixed_6_tower_1_conv_4_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_1_conv_4_conv2d_bn"
+  top: "mixed_6_tower_1_conv_4_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_6_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_5_chconcat"
+  top: "AVE_pool_mixed_6_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_6_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_6_pool"
+  top: "mixed_6_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_6_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_2_conv_conv2d"
+  top: "mixed_6_tower_2_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_6_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_2_conv_conv2d_bn"
+  top: "mixed_6_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_6_chconcat"
+  type: "Concat"
+  bottom: "mixed_6_conv_conv2d_relu"
+  bottom: "mixed_6_tower_conv_2_conv2d_relu"
+  bottom: "mixed_6_tower_1_conv_4_conv2d_relu"
+  bottom: "mixed_6_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_6_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_7_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_6_chconcat"
+  top: "mixed_7_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_7_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_conv_conv2d"
+  top: "mixed_7_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_7_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_7_conv_conv2d_bn"
+  top: "mixed_7_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_6_chconcat"
+  top: "mixed_7_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_7_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_conv_conv2d"
+  top: "mixed_7_tower_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_7_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_conv_conv2d_bn"
+  top: "mixed_7_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_conv_conv2d_relu"
+  top: "mixed_7_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_7_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_conv_1_conv2d"
+  top: "mixed_7_tower_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_7_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_conv_1_conv2d_bn"
+  top: "mixed_7_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_conv_1_conv2d_relu"
+  top: "mixed_7_tower_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_7_tower_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_conv_2_conv2d"
+  top: "mixed_7_tower_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_7_tower_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_conv_2_conv2d_bn"
+  top: "mixed_7_tower_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_6_chconcat"
+  top: "mixed_7_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_1_conv_conv2d"
+  top: "mixed_7_tower_1_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_7_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_1_conv_conv2d_bn"
+  top: "mixed_7_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_1_conv_conv2d_relu"
+  top: "mixed_7_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_1_conv_1_conv2d"
+  top: "mixed_7_tower_1_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_7_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_1_conv_1_conv2d_bn"
+  top: "mixed_7_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_1_conv_1_conv2d_relu"
+  top: "mixed_7_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_1_conv_2_conv2d"
+  top: "mixed_7_tower_1_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_7_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_1_conv_2_conv2d_bn"
+  top: "mixed_7_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_1_conv_3_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_1_conv_2_conv2d_relu"
+  top: "mixed_7_tower_1_conv_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_1_conv_3_conv2d"
+  top: "mixed_7_tower_1_conv_3_conv2d_bn"
+}
+layer {
+  name: "mixed_7_tower_1_conv_3_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_1_conv_3_conv2d_bn"
+  top: "mixed_7_tower_1_conv_3_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_1_conv_4_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_1_conv_3_conv2d_relu"
+  top: "mixed_7_tower_1_conv_4_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_4_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_1_conv_4_conv2d"
+  top: "mixed_7_tower_1_conv_4_conv2d_bn"
+}
+layer {
+  name: "mixed_7_tower_1_conv_4_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_1_conv_4_conv2d_bn"
+  top: "mixed_7_tower_1_conv_4_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_7_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_6_chconcat"
+  top: "AVE_pool_mixed_7_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_7_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_7_pool"
+  top: "mixed_7_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_7_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_2_conv_conv2d"
+  top: "mixed_7_tower_2_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_7_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_2_conv_conv2d_bn"
+  top: "mixed_7_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_7_chconcat"
+  type: "Concat"
+  bottom: "mixed_7_conv_conv2d_relu"
+  bottom: "mixed_7_tower_conv_2_conv2d_relu"
+  bottom: "mixed_7_tower_1_conv_4_conv2d_relu"
+  bottom: "mixed_7_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_7_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_8_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_7_chconcat"
+  top: "mixed_8_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_8_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_conv_conv2d"
+  top: "mixed_8_tower_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_8_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_conv_conv2d_bn"
+  top: "mixed_8_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_8_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_8_tower_conv_conv2d_relu"
+  top: "mixed_8_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 320
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_8_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_conv_1_conv2d"
+  top: "mixed_8_tower_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_8_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_conv_1_conv2d_bn"
+  top: "mixed_8_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_8_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_7_chconcat"
+  top: "mixed_8_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_1_conv_conv2d"
+  top: "mixed_8_tower_1_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_8_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_1_conv_conv2d_bn"
+  top: "mixed_8_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_8_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_8_tower_1_conv_conv2d_relu"
+  top: "mixed_8_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_1_conv_1_conv2d"
+  top: "mixed_8_tower_1_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_8_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_1_conv_1_conv2d_bn"
+  top: "mixed_8_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_8_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_8_tower_1_conv_1_conv2d_relu"
+  top: "mixed_8_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_1_conv_2_conv2d"
+  top: "mixed_8_tower_1_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_8_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_1_conv_2_conv2d_bn"
+  top: "mixed_8_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_8_tower_1_conv_3_conv2d"
+  type: "Convolution"
+  bottom: "mixed_8_tower_1_conv_2_conv2d_relu"
+  top: "mixed_8_tower_1_conv_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_1_conv_3_conv2d"
+  top: "mixed_8_tower_1_conv_3_conv2d_bn"
+}
+layer {
+  name: "mixed_8_tower_1_conv_3_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_1_conv_3_conv2d_bn"
+  top: "mixed_8_tower_1_conv_3_conv2d_relu"
+}
+layer {
+  name: "MAX_pool_mixed_8_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_7_chconcat"
+  top: "MAX_pool_mixed_8_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+    pad: 0
+  }
+}
+layer {
+  name: "ch_concat_mixed_8_chconcat"
+  type: "Concat"
+  bottom: "mixed_8_tower_conv_1_conv2d_relu"
+  bottom: "mixed_8_tower_1_conv_3_conv2d_relu"
+  bottom: "MAX_pool_mixed_8_pool"
+  top: "ch_concat_mixed_8_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_9_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_8_chconcat"
+  top: "mixed_9_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 320
+    bias_term: false
+    pad: 0
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    kernel_h: 1
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_9_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_conv_conv2d"
+  top: "mixed_9_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_9_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_conv_conv2d_bn"
+  top: "mixed_9_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_8_chconcat"
+  top: "mixed_9_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_9_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_conv_conv2d"
+  top: "mixed_9_tower_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_9_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_conv_conv2d_bn"
+  top: "mixed_9_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_conv2d"
+  type: "Convolution"
+  bottom: "mixed_9_tower_conv_conv2d_relu"
+  top: "mixed_9_tower_mixed_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 1
+    kernel_h: 1
+    kernel_w: 3
+  }
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_mixed_conv_conv2d"
+  top: "mixed_9_tower_mixed_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_mixed_conv_conv2d_bn"
+  top: "mixed_9_tower_mixed_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_9_tower_conv_conv2d_relu"
+  top: "mixed_9_tower_mixed_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 1
+    pad_w: 0
+    kernel_h: 3
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_mixed_conv_1_conv2d"
+  top: "mixed_9_tower_mixed_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_mixed_conv_1_conv2d_bn"
+  top: "mixed_9_tower_mixed_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_8_chconcat"
+  top: "mixed_9_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 448
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_9_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_1_conv_conv2d"
+  top: "mixed_9_tower_1_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_9_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_1_conv_conv2d_bn"
+  top: "mixed_9_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_9_tower_1_conv_conv2d_relu"
+  top: "mixed_9_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_9_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_1_conv_1_conv2d"
+  top: "mixed_9_tower_1_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_9_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_1_conv_1_conv2d_bn"
+  top: "mixed_9_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_conv2d"
+  type: "Convolution"
+  bottom: "mixed_9_tower_1_conv_1_conv2d_relu"
+  top: "mixed_9_tower_1_mixed_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 1
+    kernel_h: 1
+    kernel_w: 3
+  }
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_1_mixed_conv_conv2d"
+  top: "mixed_9_tower_1_mixed_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_1_mixed_conv_conv2d_bn"
+  top: "mixed_9_tower_1_mixed_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_9_tower_1_conv_1_conv2d_relu"
+  top: "mixed_9_tower_1_mixed_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 1
+    pad_w: 0
+    kernel_h: 3
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d"
+  top: "mixed_9_tower_1_mixed_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d_bn"
+  top: "mixed_9_tower_1_mixed_conv_1_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_9_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_8_chconcat"
+  top: "AVE_pool_mixed_9_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_9_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_9_pool"
+  top: "mixed_9_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_9_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_2_conv_conv2d"
+  top: "mixed_9_tower_2_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_9_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_2_conv_conv2d_bn"
+  top: "mixed_9_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_9_chconcat"
+  type: "Concat"
+  bottom: "mixed_9_conv_conv2d_relu"
+  bottom: "mixed_9_tower_mixed_conv_conv2d_relu"
+  bottom: "mixed_9_tower_mixed_conv_1_conv2d_relu"
+  bottom: "mixed_9_tower_1_mixed_conv_conv2d_relu"
+  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d_relu"
+  bottom: "mixed_9_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_9_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_10_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_9_chconcat"
+  top: "mixed_10_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 320
+    bias_term: false
+    pad: 0
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    kernel_h: 1
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_10_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_conv_conv2d"
+  top: "mixed_10_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_10_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_conv_conv2d_bn"
+  top: "mixed_10_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_9_chconcat"
+  top: "mixed_10_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_10_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_conv_conv2d"
+  top: "mixed_10_tower_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_10_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_conv_conv2d_bn"
+  top: "mixed_10_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_conv2d"
+  type: "Convolution"
+  bottom: "mixed_10_tower_conv_conv2d_relu"
+  top: "mixed_10_tower_mixed_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 1
+    kernel_h: 1
+    kernel_w: 3
+  }
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_mixed_conv_conv2d"
+  top: "mixed_10_tower_mixed_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_mixed_conv_conv2d_bn"
+  top: "mixed_10_tower_mixed_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_10_tower_conv_conv2d_relu"
+  top: "mixed_10_tower_mixed_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 1
+    pad_w: 0
+    kernel_h: 3
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_mixed_conv_1_conv2d"
+  top: "mixed_10_tower_mixed_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_mixed_conv_1_conv2d_bn"
+  top: "mixed_10_tower_mixed_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_9_chconcat"
+  top: "mixed_10_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 448
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_10_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_1_conv_conv2d"
+  top: "mixed_10_tower_1_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_10_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_1_conv_conv2d_bn"
+  top: "mixed_10_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_10_tower_1_conv_conv2d_relu"
+  top: "mixed_10_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_10_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_1_conv_1_conv2d"
+  top: "mixed_10_tower_1_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_10_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_1_conv_1_conv2d_bn"
+  top: "mixed_10_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_conv2d"
+  type: "Convolution"
+  bottom: "mixed_10_tower_1_conv_1_conv2d_relu"
+  top: "mixed_10_tower_1_mixed_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 1
+    kernel_h: 1
+    kernel_w: 3
+  }
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_1_mixed_conv_conv2d"
+  top: "mixed_10_tower_1_mixed_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_1_mixed_conv_conv2d_bn"
+  top: "mixed_10_tower_1_mixed_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_10_tower_1_conv_1_conv2d_relu"
+  top: "mixed_10_tower_1_mixed_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 1
+    pad_w: 0
+    kernel_h: 3
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d"
+  top: "mixed_10_tower_1_mixed_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d_bn"
+  top: "mixed_10_tower_1_mixed_conv_1_conv2d_relu"
+}
+layer {
+  name: "MAX_pool_mixed_10_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_9_chconcat"
+  top: "MAX_pool_mixed_10_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_10_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "MAX_pool_mixed_10_pool"
+  top: "mixed_10_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_10_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_2_conv_conv2d"
+  top: "mixed_10_tower_2_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_10_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_2_conv_conv2d_bn"
+  top: "mixed_10_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_10_chconcat"
+  type: "Concat"
+  bottom: "mixed_10_conv_conv2d_relu"
+  bottom: "mixed_10_tower_mixed_conv_conv2d_relu"
+  bottom: "mixed_10_tower_mixed_conv_1_conv2d_relu"
+  bottom: "mixed_10_tower_1_mixed_conv_conv2d_relu"
+  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d_relu"
+  bottom: "mixed_10_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_10_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "global_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_10_chconcat"
+  top: "global_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 8
+    stride: 1
+    pad: 0
+  }
+}
+layer {
+  name: "drop"
+  type: "Dropout"
+  bottom: "global_pool"
+  top: "global_pool"
+  dropout_param {
+    dropout_ratio: 0.8
+  }
+}
+layer {
+  name: "flatten"
+  type: "Flatten"
+  bottom: "global_pool"
+  top: "flatten"
+}
+layer {
+  name: "fc1"
+  type: "InnerProduct"
+  bottom: "flatten"
+  top: "fc1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0.0
+  }
+  inner_product_param {
+    num_output: 1000
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0.0
+    }
+  }
+}
+layer {
+  name: "loss"
+  type: "SoftmaxWithLoss"
+  bottom: "fc1"
+  bottom: "label"
+  top: "loss"
+}
+layer {
+  name: "loss3/top-1"
+  type: "Accuracy"
+  bottom: "fc1"
+  bottom: "label"
+  top: "loss3/top-1"
+  include {
+    phase: TEST
+  }
+}
+layer {
+  name: "loss3/top-5"
+  type: "Accuracy"
+  bottom: "fc1"
+  bottom: "label"
+  top: "loss3/top-5"
+  include {
+    phase: TEST
+  }
+  accuracy_param {
+    top_k: 5
+  }
+}
\ No newline at end of file
diff --git a/models/intel_optimized_models/googlenet_v3/mkldnn/nodata_solver.prototxt b/models/intel_optimized_models/googlenet_v3/mkldnn/nodata_solver.prototxt
new file mode 100644
index 000000000..f143ca98f
--- /dev/null
+++ b/models/intel_optimized_models/googlenet_v3/mkldnn/nodata_solver.prototxt
@@ -0,0 +1,16 @@
+net: "models/intel_optimized_models/googlenet_v3/mkldnn/nodata_train_val.prototxt"
+test_iter: 1000
+test_interval: 10000
+test_initialization: false
+display: 40
+average_loss: 40
+base_lr: 0.045
+lr_policy: "step"
+stepsize: 6400
+gamma: 0.96
+max_iter: 1200000
+momentum: 0.9
+weight_decay: 0.0002
+snapshot: 50000
+snapshot_prefix: "models/intel_optimized_models/googlenet_v3/mkldnn/googlenet_v3_mkldnn_nodata"
+solver_mode: CPU
diff --git a/models/intel_optimized_models/googlenet_v3/mkldnn/nodata_train_val.prototxt b/models/intel_optimized_models/googlenet_v3/mkldnn/nodata_train_val.prototxt
new file mode 100644
index 000000000..aa56e7dfc
--- /dev/null
+++ b/models/intel_optimized_models/googlenet_v3/mkldnn/nodata_train_val.prototxt
@@ -0,0 +1,3577 @@
+name: "InceptionV3"
+
+layer {
+  name: "data"
+  type: "DummyData"
+  top: "data"
+  include {
+    phase: TRAIN
+  }
+  dummy_data_param {
+    shape: { dim: 22 dim: 3 dim: 299 dim: 299 }
+    data_filler {
+      type: "constant"
+      value: 0.01
+    }
+  }
+}
+layer {
+  name: "data"
+  type: "DummyData"
+  top: "label"
+  include {
+    phase: TRAIN
+  }
+  dummy_data_param {
+    shape: { dim: 22  }
+    data_filler {
+      type: "constant"
+    }
+  }
+}
+#--------------------
+
+
+layer {
+  name: "conv_conv2d"
+  type: "Convolution"
+  bottom: "data"
+  top: "conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 32
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "conv_conv2d"
+  top: "conv_conv2d_bn"
+}
+layer {
+  name: "conv_relu"
+  type: "ReLU"
+  bottom: "conv_conv2d_bn"
+  top: "conv_conv2d_relu"
+}
+layer {
+  name: "conv_1_1_conv2d"
+  type: "Convolution"
+  bottom: "conv_conv2d_relu"
+  top: "conv_1_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 32
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "conv_1_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "conv_1_1_conv2d"
+  top: "conv_1_1_conv2d_bn"
+}
+layer {
+  name: "conv_1_1_relu"
+  type: "ReLU"
+  bottom: "conv_1_1_conv2d_bn"
+  top: "conv_1_1_conv2d_relu"
+}
+layer {
+  name: "conv_2_2_conv2d"
+  type: "Convolution"
+  bottom: "conv_1_1_conv2d_relu"
+  top: "conv_2_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "conv_2_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "conv_2_2_conv2d"
+  top: "conv_2_2_conv2d_bn"
+}
+layer {
+  name: "conv_2_2_relu"
+  type: "ReLU"
+  bottom: "conv_2_2_conv2d_bn"
+  top: "conv_2_2_conv2d_relu"
+}
+layer {
+  name: "pool"
+  type: "Pooling"
+  bottom: "conv_2_2_conv2d_relu"
+  top: "pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+    pad: 0
+  }
+}
+layer {
+  name: "conv_3_3_conv2d"
+  type: "Convolution"
+  bottom: "pool"
+  top: "conv_3_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 80
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "conv_3_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "conv_3_3_conv2d"
+  top: "conv_3_3_conv2d_bn"
+}
+layer {
+  name: "conv_3_3_relu"
+  type: "ReLU"
+  bottom: "conv_3_3_conv2d_bn"
+  top: "conv_3_3_conv2d_relu"
+}
+layer {
+  name: "conv_4_4_conv2d"
+  type: "Convolution"
+  bottom: "conv_3_3_conv2d_relu"
+  top: "conv_4_4_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "conv_4_4_batchnorm"
+  type: "BatchNorm"
+  bottom: "conv_4_4_conv2d"
+  top: "conv_4_4_conv2d_bn"
+}
+layer {
+  name: "conv_4_4_relu"
+  type: "ReLU"
+  bottom: "conv_4_4_conv2d_bn"
+  top: "conv_4_4_conv2d_relu"
+}
+layer {
+  name: "pool1"
+  type: "Pooling"
+  bottom: "conv_4_4_conv2d_relu"
+  top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+    pad: 0
+  }
+}
+layer {
+  name: "mixed_conv_conv2d"
+  type: "Convolution"
+  bottom: "pool1"
+  top: "mixed_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_conv_conv2d"
+  top: "mixed_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_conv_conv2d_bn"
+  top: "mixed_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "pool1"
+  top: "mixed_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 48
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_conv_conv2d"
+  top: "mixed_tower_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_conv_conv2d_bn"
+  top: "mixed_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_tower_conv_conv2d_relu"
+  top: "mixed_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_conv_1_conv2d"
+  top: "mixed_tower_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_conv_1_conv2d_bn"
+  top: "mixed_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "pool1"
+  top: "mixed_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_1_conv_conv2d"
+  top: "mixed_tower_1_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_1_conv_conv2d_bn"
+  top: "mixed_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_tower_1_conv_conv2d_relu"
+  top: "mixed_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_1_conv_1_conv2d"
+  top: "mixed_tower_1_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_1_conv_1_conv2d_bn"
+  top: "mixed_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_tower_1_conv_1_conv2d_relu"
+  top: "mixed_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_1_conv_2_conv2d"
+  top: "mixed_tower_1_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_1_conv_2_conv2d_bn"
+  top: "mixed_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_pool"
+  type: "Pooling"
+  bottom: "pool1"
+  top: "AVE_pool_mixed_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_pool"
+  top: "mixed_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 32
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_tower_2_conv_conv2d"
+  top: "mixed_tower_2_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_tower_2_conv_conv2d_bn"
+  top: "mixed_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_chconcat"
+  type: "Concat"
+  bottom: "mixed_conv_conv2d_relu"
+  bottom: "mixed_tower_conv_1_conv2d_relu"
+  bottom: "mixed_tower_1_conv_2_conv2d_relu"
+  bottom: "mixed_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_chconcat"
+  top: "mixed_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_conv_conv2d"
+  top: "mixed_1_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_1_conv_conv2d_bn"
+  top: "mixed_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_1_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_chconcat"
+  top: "mixed_1_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 48
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_conv_conv2d"
+  top: "mixed_1_tower_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_1_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_conv_conv2d_bn"
+  top: "mixed_1_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_1_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_1_tower_conv_conv2d_relu"
+  top: "mixed_1_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_conv_1_conv2d"
+  top: "mixed_1_tower_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_1_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_conv_1_conv2d_bn"
+  top: "mixed_1_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_1_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_chconcat"
+  top: "mixed_1_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_1_conv_conv2d"
+  top: "mixed_1_tower_1_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_1_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_1_conv_conv2d_bn"
+  top: "mixed_1_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_1_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_1_tower_1_conv_conv2d_relu"
+  top: "mixed_1_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_1_conv_1_conv2d"
+  top: "mixed_1_tower_1_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_1_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_1_conv_1_conv2d_bn"
+  top: "mixed_1_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_1_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_1_tower_1_conv_1_conv2d_relu"
+  top: "mixed_1_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_1_conv_2_conv2d"
+  top: "mixed_1_tower_1_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_1_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_1_conv_2_conv2d_bn"
+  top: "mixed_1_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_1_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_chconcat"
+  top: "AVE_pool_mixed_1_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_1_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_1_pool"
+  top: "mixed_1_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_1_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_1_tower_2_conv_conv2d"
+  top: "mixed_1_tower_2_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_1_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_1_tower_2_conv_conv2d_bn"
+  top: "mixed_1_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_1_chconcat"
+  type: "Concat"
+  bottom: "mixed_1_conv_conv2d_relu"
+  bottom: "mixed_1_tower_conv_1_conv2d_relu"
+  bottom: "mixed_1_tower_1_conv_2_conv2d_relu"
+  bottom: "mixed_1_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_1_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_1_chconcat"
+  top: "mixed_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_conv_conv2d"
+  top: "mixed_2_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_2_conv_conv2d_bn"
+  top: "mixed_2_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_2_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_1_chconcat"
+  top: "mixed_2_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 48
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_conv_conv2d"
+  top: "mixed_2_tower_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_2_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_conv_conv2d_bn"
+  top: "mixed_2_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_2_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_2_tower_conv_conv2d_relu"
+  top: "mixed_2_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 2
+    kernel_size: 5
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_conv_1_conv2d"
+  top: "mixed_2_tower_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_2_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_conv_1_conv2d_bn"
+  top: "mixed_2_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_2_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_1_chconcat"
+  top: "mixed_2_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_1_conv_conv2d"
+  top: "mixed_2_tower_1_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_2_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_1_conv_conv2d_bn"
+  top: "mixed_2_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_2_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_2_tower_1_conv_conv2d_relu"
+  top: "mixed_2_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_1_conv_1_conv2d"
+  top: "mixed_2_tower_1_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_2_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_1_conv_1_conv2d_bn"
+  top: "mixed_2_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_2_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_2_tower_1_conv_1_conv2d_relu"
+  top: "mixed_2_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_1_conv_2_conv2d"
+  top: "mixed_2_tower_1_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_2_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_1_conv_2_conv2d_bn"
+  top: "mixed_2_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_2_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_1_chconcat"
+  top: "AVE_pool_mixed_2_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_2_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_2_pool"
+  top: "mixed_2_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_2_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_2_tower_2_conv_conv2d"
+  top: "mixed_2_tower_2_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_2_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_2_tower_2_conv_conv2d_bn"
+  top: "mixed_2_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_2_chconcat"
+  type: "Concat"
+  bottom: "mixed_2_conv_conv2d_relu"
+  bottom: "mixed_2_tower_conv_1_conv2d_relu"
+  bottom: "mixed_2_tower_1_conv_2_conv2d_relu"
+  bottom: "mixed_2_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_2_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_3_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_2_chconcat"
+  top: "mixed_3_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_3_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_3_conv_conv2d"
+  top: "mixed_3_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_3_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_3_conv_conv2d_bn"
+  top: "mixed_3_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_3_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_2_chconcat"
+  top: "mixed_3_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 64
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_3_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_3_tower_conv_conv2d"
+  top: "mixed_3_tower_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_3_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_3_tower_conv_conv2d_bn"
+  top: "mixed_3_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_3_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_3_tower_conv_conv2d_relu"
+  top: "mixed_3_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_3_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_3_tower_conv_1_conv2d"
+  top: "mixed_3_tower_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_3_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_3_tower_conv_1_conv2d_bn"
+  top: "mixed_3_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_3_tower_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_3_tower_conv_1_conv2d_relu"
+  top: "mixed_3_tower_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 96
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_3_tower_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_3_tower_conv_2_conv2d"
+  top: "mixed_3_tower_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_3_tower_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_3_tower_conv_2_conv2d_bn"
+  top: "mixed_3_tower_conv_2_conv2d_relu"
+}
+layer {
+  name: "max_pool_mixed_3_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_2_chconcat"
+  top: "max_pool_mixed_3_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+    pad: 0
+  }
+}
+layer {
+  name: "ch_concat_mixed_3_chconcat"
+  type: "Concat"
+  bottom: "max_pool_mixed_3_pool"
+  bottom: "mixed_3_conv_conv2d_relu"
+  bottom: "mixed_3_tower_conv_2_conv2d_relu"
+  top: "ch_concat_mixed_3_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_4_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_3_chconcat"
+  top: "mixed_4_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_4_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_conv_conv2d"
+  top: "mixed_4_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_4_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_4_conv_conv2d_bn"
+  top: "mixed_4_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_3_chconcat"
+  top: "mixed_4_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_4_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_conv_conv2d"
+  top: "mixed_4_tower_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_4_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_conv_conv2d_bn"
+  top: "mixed_4_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_conv_conv2d_relu"
+  top: "mixed_4_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_4_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_conv_1_conv2d"
+  top: "mixed_4_tower_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_4_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_conv_1_conv2d_bn"
+  top: "mixed_4_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_conv_1_conv2d_relu"
+  top: "mixed_4_tower_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_4_tower_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_conv_2_conv2d"
+  top: "mixed_4_tower_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_4_tower_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_conv_2_conv2d_bn"
+  top: "mixed_4_tower_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_3_chconcat"
+  top: "mixed_4_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_1_conv_conv2d"
+  top: "mixed_4_tower_1_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_4_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_1_conv_conv2d_bn"
+  top: "mixed_4_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_1_conv_conv2d_relu"
+  top: "mixed_4_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_1_conv_1_conv2d"
+  top: "mixed_4_tower_1_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_4_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_1_conv_1_conv2d_bn"
+  top: "mixed_4_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_1_conv_1_conv2d_relu"
+  top: "mixed_4_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_1_conv_2_conv2d"
+  top: "mixed_4_tower_1_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_4_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_1_conv_2_conv2d_bn"
+  top: "mixed_4_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_1_conv_3_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_1_conv_2_conv2d_relu"
+  top: "mixed_4_tower_1_conv_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_1_conv_3_conv2d"
+  top: "mixed_4_tower_1_conv_3_conv2d_bn"
+}
+layer {
+  name: "mixed_4_tower_1_conv_3_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_1_conv_3_conv2d_bn"
+  top: "mixed_4_tower_1_conv_3_conv2d_relu"
+}
+layer {
+  name: "mixed_4_tower_1_conv_4_conv2d"
+  type: "Convolution"
+  bottom: "mixed_4_tower_1_conv_3_conv2d_relu"
+  top: "mixed_4_tower_1_conv_4_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_4_tower_1_conv_4_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_1_conv_4_conv2d"
+  top: "mixed_4_tower_1_conv_4_conv2d_bn"
+}
+layer {
+  name: "mixed_4_tower_1_conv_4_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_1_conv_4_conv2d_bn"
+  top: "mixed_4_tower_1_conv_4_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_4_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_3_chconcat"
+  top: "AVE_pool_mixed_4_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_4_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_4_pool"
+  top: "mixed_4_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_4_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_4_tower_2_conv_conv2d"
+  top: "mixed_4_tower_2_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_4_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_4_tower_2_conv_conv2d_bn"
+  top: "mixed_4_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_4_chconcat"
+  type: "Concat"
+  bottom: "mixed_4_conv_conv2d_relu"
+  bottom: "mixed_4_tower_conv_2_conv2d_relu"
+  bottom: "mixed_4_tower_1_conv_4_conv2d_relu"
+  bottom: "mixed_4_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_4_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_5_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_4_chconcat"
+  top: "mixed_5_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_5_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_conv_conv2d"
+  top: "mixed_5_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_5_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_5_conv_conv2d_bn"
+  top: "mixed_5_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_4_chconcat"
+  top: "mixed_5_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_5_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_conv_conv2d"
+  top: "mixed_5_tower_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_5_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_conv_conv2d_bn"
+  top: "mixed_5_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_conv_conv2d_relu"
+  top: "mixed_5_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_5_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_conv_1_conv2d"
+  top: "mixed_5_tower_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_5_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_conv_1_conv2d_bn"
+  top: "mixed_5_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_conv_1_conv2d_relu"
+  top: "mixed_5_tower_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_5_tower_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_conv_2_conv2d"
+  top: "mixed_5_tower_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_5_tower_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_conv_2_conv2d_bn"
+  top: "mixed_5_tower_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_4_chconcat"
+  top: "mixed_5_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_1_conv_conv2d"
+  top: "mixed_5_tower_1_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_5_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_1_conv_conv2d_bn"
+  top: "mixed_5_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_1_conv_conv2d_relu"
+  top: "mixed_5_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_1_conv_1_conv2d"
+  top: "mixed_5_tower_1_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_5_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_1_conv_1_conv2d_bn"
+  top: "mixed_5_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_1_conv_1_conv2d_relu"
+  top: "mixed_5_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_1_conv_2_conv2d"
+  top: "mixed_5_tower_1_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_5_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_1_conv_2_conv2d_bn"
+  top: "mixed_5_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_1_conv_3_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_1_conv_2_conv2d_relu"
+  top: "mixed_5_tower_1_conv_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_1_conv_3_conv2d"
+  top: "mixed_5_tower_1_conv_3_conv2d_bn"
+}
+layer {
+  name: "mixed_5_tower_1_conv_3_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_1_conv_3_conv2d_bn"
+  top: "mixed_5_tower_1_conv_3_conv2d_relu"
+}
+layer {
+  name: "mixed_5_tower_1_conv_4_conv2d"
+  type: "Convolution"
+  bottom: "mixed_5_tower_1_conv_3_conv2d_relu"
+  top: "mixed_5_tower_1_conv_4_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_5_tower_1_conv_4_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_1_conv_4_conv2d"
+  top: "mixed_5_tower_1_conv_4_conv2d_bn"
+}
+layer {
+  name: "mixed_5_tower_1_conv_4_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_1_conv_4_conv2d_bn"
+  top: "mixed_5_tower_1_conv_4_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_5_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_4_chconcat"
+  top: "AVE_pool_mixed_5_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_5_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_5_pool"
+  top: "mixed_5_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_5_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_5_tower_2_conv_conv2d"
+  top: "mixed_5_tower_2_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_5_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_5_tower_2_conv_conv2d_bn"
+  top: "mixed_5_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_5_chconcat"
+  type: "Concat"
+  bottom: "mixed_5_conv_conv2d_relu"
+  bottom: "mixed_5_tower_conv_2_conv2d_relu"
+  bottom: "mixed_5_tower_1_conv_4_conv2d_relu"
+  bottom: "mixed_5_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_5_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_6_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_5_chconcat"
+  top: "mixed_6_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_6_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_conv_conv2d"
+  top: "mixed_6_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_6_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_6_conv_conv2d_bn"
+  top: "mixed_6_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_5_chconcat"
+  top: "mixed_6_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_6_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_conv_conv2d"
+  top: "mixed_6_tower_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_6_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_conv_conv2d_bn"
+  top: "mixed_6_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_conv_conv2d_relu"
+  top: "mixed_6_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_6_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_conv_1_conv2d"
+  top: "mixed_6_tower_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_6_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_conv_1_conv2d_bn"
+  top: "mixed_6_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_conv_1_conv2d_relu"
+  top: "mixed_6_tower_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_6_tower_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_conv_2_conv2d"
+  top: "mixed_6_tower_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_6_tower_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_conv_2_conv2d_bn"
+  top: "mixed_6_tower_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_5_chconcat"
+  top: "mixed_6_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_1_conv_conv2d"
+  top: "mixed_6_tower_1_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_6_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_1_conv_conv2d_bn"
+  top: "mixed_6_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_1_conv_conv2d_relu"
+  top: "mixed_6_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_1_conv_1_conv2d"
+  top: "mixed_6_tower_1_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_6_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_1_conv_1_conv2d_bn"
+  top: "mixed_6_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_1_conv_1_conv2d_relu"
+  top: "mixed_6_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_1_conv_2_conv2d"
+  top: "mixed_6_tower_1_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_6_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_1_conv_2_conv2d_bn"
+  top: "mixed_6_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_1_conv_3_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_1_conv_2_conv2d_relu"
+  top: "mixed_6_tower_1_conv_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 160
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_1_conv_3_conv2d"
+  top: "mixed_6_tower_1_conv_3_conv2d_bn"
+}
+layer {
+  name: "mixed_6_tower_1_conv_3_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_1_conv_3_conv2d_bn"
+  top: "mixed_6_tower_1_conv_3_conv2d_relu"
+}
+layer {
+  name: "mixed_6_tower_1_conv_4_conv2d"
+  type: "Convolution"
+  bottom: "mixed_6_tower_1_conv_3_conv2d_relu"
+  top: "mixed_6_tower_1_conv_4_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_6_tower_1_conv_4_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_1_conv_4_conv2d"
+  top: "mixed_6_tower_1_conv_4_conv2d_bn"
+}
+layer {
+  name: "mixed_6_tower_1_conv_4_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_1_conv_4_conv2d_bn"
+  top: "mixed_6_tower_1_conv_4_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_6_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_5_chconcat"
+  top: "AVE_pool_mixed_6_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_6_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_6_pool"
+  top: "mixed_6_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_6_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_6_tower_2_conv_conv2d"
+  top: "mixed_6_tower_2_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_6_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_6_tower_2_conv_conv2d_bn"
+  top: "mixed_6_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_6_chconcat"
+  type: "Concat"
+  bottom: "mixed_6_conv_conv2d_relu"
+  bottom: "mixed_6_tower_conv_2_conv2d_relu"
+  bottom: "mixed_6_tower_1_conv_4_conv2d_relu"
+  bottom: "mixed_6_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_6_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_7_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_6_chconcat"
+  top: "mixed_7_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_7_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_conv_conv2d"
+  top: "mixed_7_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_7_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_7_conv_conv2d_bn"
+  top: "mixed_7_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_6_chconcat"
+  top: "mixed_7_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_7_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_conv_conv2d"
+  top: "mixed_7_tower_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_7_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_conv_conv2d_bn"
+  top: "mixed_7_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_conv_conv2d_relu"
+  top: "mixed_7_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_7_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_conv_1_conv2d"
+  top: "mixed_7_tower_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_7_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_conv_1_conv2d_bn"
+  top: "mixed_7_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_conv_1_conv2d_relu"
+  top: "mixed_7_tower_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_7_tower_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_conv_2_conv2d"
+  top: "mixed_7_tower_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_7_tower_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_conv_2_conv2d_bn"
+  top: "mixed_7_tower_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_6_chconcat"
+  top: "mixed_7_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_1_conv_conv2d"
+  top: "mixed_7_tower_1_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_7_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_1_conv_conv2d_bn"
+  top: "mixed_7_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_1_conv_conv2d_relu"
+  top: "mixed_7_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_1_conv_1_conv2d"
+  top: "mixed_7_tower_1_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_7_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_1_conv_1_conv2d_bn"
+  top: "mixed_7_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_1_conv_1_conv2d_relu"
+  top: "mixed_7_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_1_conv_2_conv2d"
+  top: "mixed_7_tower_1_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_7_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_1_conv_2_conv2d_bn"
+  top: "mixed_7_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_1_conv_3_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_1_conv_2_conv2d_relu"
+  top: "mixed_7_tower_1_conv_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_1_conv_3_conv2d"
+  top: "mixed_7_tower_1_conv_3_conv2d_bn"
+}
+layer {
+  name: "mixed_7_tower_1_conv_3_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_1_conv_3_conv2d_bn"
+  top: "mixed_7_tower_1_conv_3_conv2d_relu"
+}
+layer {
+  name: "mixed_7_tower_1_conv_4_conv2d"
+  type: "Convolution"
+  bottom: "mixed_7_tower_1_conv_3_conv2d_relu"
+  top: "mixed_7_tower_1_conv_4_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_7_tower_1_conv_4_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_1_conv_4_conv2d"
+  top: "mixed_7_tower_1_conv_4_conv2d_bn"
+}
+layer {
+  name: "mixed_7_tower_1_conv_4_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_1_conv_4_conv2d_bn"
+  top: "mixed_7_tower_1_conv_4_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_7_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_6_chconcat"
+  top: "AVE_pool_mixed_7_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_7_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_7_pool"
+  top: "mixed_7_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_7_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_7_tower_2_conv_conv2d"
+  top: "mixed_7_tower_2_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_7_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_7_tower_2_conv_conv2d_bn"
+  top: "mixed_7_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_7_chconcat"
+  type: "Concat"
+  bottom: "mixed_7_conv_conv2d_relu"
+  bottom: "mixed_7_tower_conv_2_conv2d_relu"
+  bottom: "mixed_7_tower_1_conv_4_conv2d_relu"
+  bottom: "mixed_7_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_7_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_8_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_7_chconcat"
+  top: "mixed_8_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_8_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_conv_conv2d"
+  top: "mixed_8_tower_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_8_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_conv_conv2d_bn"
+  top: "mixed_8_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_8_tower_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_8_tower_conv_conv2d_relu"
+  top: "mixed_8_tower_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 320
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_8_tower_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_conv_1_conv2d"
+  top: "mixed_8_tower_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_8_tower_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_conv_1_conv2d_bn"
+  top: "mixed_8_tower_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_8_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_7_chconcat"
+  top: "mixed_8_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_1_conv_conv2d"
+  top: "mixed_8_tower_1_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_8_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_1_conv_conv2d_bn"
+  top: "mixed_8_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_8_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_8_tower_1_conv_conv2d_relu"
+  top: "mixed_8_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 3
+    kernel_h: 1
+    kernel_w: 7
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_1_conv_1_conv2d"
+  top: "mixed_8_tower_1_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_8_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_1_conv_1_conv2d_bn"
+  top: "mixed_8_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_8_tower_1_conv_2_conv2d"
+  type: "Convolution"
+  bottom: "mixed_8_tower_1_conv_1_conv2d_relu"
+  top: "mixed_8_tower_1_conv_2_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 3
+    pad_w: 0
+    kernel_h: 7
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_2_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_1_conv_2_conv2d"
+  top: "mixed_8_tower_1_conv_2_conv2d_bn"
+}
+layer {
+  name: "mixed_8_tower_1_conv_2_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_1_conv_2_conv2d_bn"
+  top: "mixed_8_tower_1_conv_2_conv2d_relu"
+}
+layer {
+  name: "mixed_8_tower_1_conv_3_conv2d"
+  type: "Convolution"
+  bottom: "mixed_8_tower_1_conv_2_conv2d_relu"
+  top: "mixed_8_tower_1_conv_3_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_8_tower_1_conv_3_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_8_tower_1_conv_3_conv2d"
+  top: "mixed_8_tower_1_conv_3_conv2d_bn"
+}
+layer {
+  name: "mixed_8_tower_1_conv_3_relu"
+  type: "ReLU"
+  bottom: "mixed_8_tower_1_conv_3_conv2d_bn"
+  top: "mixed_8_tower_1_conv_3_conv2d_relu"
+}
+layer {
+  name: "MAX_pool_mixed_8_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_7_chconcat"
+  top: "MAX_pool_mixed_8_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 2
+    pad: 0
+  }
+}
+layer {
+  name: "ch_concat_mixed_8_chconcat"
+  type: "Concat"
+  bottom: "mixed_8_tower_conv_1_conv2d_relu"
+  bottom: "mixed_8_tower_1_conv_3_conv2d_relu"
+  bottom: "MAX_pool_mixed_8_pool"
+  top: "ch_concat_mixed_8_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_9_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_8_chconcat"
+  top: "mixed_9_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 320
+    bias_term: false
+    pad: 0
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    kernel_h: 1
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_9_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_conv_conv2d"
+  top: "mixed_9_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_9_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_conv_conv2d_bn"
+  top: "mixed_9_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_8_chconcat"
+  top: "mixed_9_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_9_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_conv_conv2d"
+  top: "mixed_9_tower_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_9_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_conv_conv2d_bn"
+  top: "mixed_9_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_conv2d"
+  type: "Convolution"
+  bottom: "mixed_9_tower_conv_conv2d_relu"
+  top: "mixed_9_tower_mixed_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 1
+    kernel_h: 1
+    kernel_w: 3
+  }
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_mixed_conv_conv2d"
+  top: "mixed_9_tower_mixed_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_mixed_conv_conv2d_bn"
+  top: "mixed_9_tower_mixed_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_9_tower_conv_conv2d_relu"
+  top: "mixed_9_tower_mixed_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 1
+    pad_w: 0
+    kernel_h: 3
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_mixed_conv_1_conv2d"
+  top: "mixed_9_tower_mixed_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_9_tower_mixed_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_mixed_conv_1_conv2d_bn"
+  top: "mixed_9_tower_mixed_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_8_chconcat"
+  top: "mixed_9_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 448
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_9_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_1_conv_conv2d"
+  top: "mixed_9_tower_1_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_9_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_1_conv_conv2d_bn"
+  top: "mixed_9_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_9_tower_1_conv_conv2d_relu"
+  top: "mixed_9_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_9_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_1_conv_1_conv2d"
+  top: "mixed_9_tower_1_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_9_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_1_conv_1_conv2d_bn"
+  top: "mixed_9_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_conv2d"
+  type: "Convolution"
+  bottom: "mixed_9_tower_1_conv_1_conv2d_relu"
+  top: "mixed_9_tower_1_mixed_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 1
+    kernel_h: 1
+    kernel_w: 3
+  }
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_1_mixed_conv_conv2d"
+  top: "mixed_9_tower_1_mixed_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_1_mixed_conv_conv2d_bn"
+  top: "mixed_9_tower_1_mixed_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_9_tower_1_conv_1_conv2d_relu"
+  top: "mixed_9_tower_1_mixed_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 1
+    pad_w: 0
+    kernel_h: 3
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d"
+  top: "mixed_9_tower_1_mixed_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_9_tower_1_mixed_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d_bn"
+  top: "mixed_9_tower_1_mixed_conv_1_conv2d_relu"
+}
+layer {
+  name: "AVE_pool_mixed_9_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_8_chconcat"
+  top: "AVE_pool_mixed_9_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_9_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "AVE_pool_mixed_9_pool"
+  top: "mixed_9_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_9_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_9_tower_2_conv_conv2d"
+  top: "mixed_9_tower_2_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_9_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_9_tower_2_conv_conv2d_bn"
+  top: "mixed_9_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_9_chconcat"
+  type: "Concat"
+  bottom: "mixed_9_conv_conv2d_relu"
+  bottom: "mixed_9_tower_mixed_conv_conv2d_relu"
+  bottom: "mixed_9_tower_mixed_conv_1_conv2d_relu"
+  bottom: "mixed_9_tower_1_mixed_conv_conv2d_relu"
+  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d_relu"
+  bottom: "mixed_9_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_9_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mixed_10_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_9_chconcat"
+  top: "mixed_10_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 320
+    bias_term: false
+    pad: 0
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    kernel_h: 1
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_10_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_conv_conv2d"
+  top: "mixed_10_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_10_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_conv_conv2d_bn"
+  top: "mixed_10_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_9_chconcat"
+  top: "mixed_10_tower_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_10_tower_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_conv_conv2d"
+  top: "mixed_10_tower_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_10_tower_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_conv_conv2d_bn"
+  top: "mixed_10_tower_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_conv2d"
+  type: "Convolution"
+  bottom: "mixed_10_tower_conv_conv2d_relu"
+  top: "mixed_10_tower_mixed_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 1
+    kernel_h: 1
+    kernel_w: 3
+  }
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_mixed_conv_conv2d"
+  top: "mixed_10_tower_mixed_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_mixed_conv_conv2d_bn"
+  top: "mixed_10_tower_mixed_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_10_tower_conv_conv2d_relu"
+  top: "mixed_10_tower_mixed_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 1
+    pad_w: 0
+    kernel_h: 3
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_mixed_conv_1_conv2d"
+  top: "mixed_10_tower_mixed_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_10_tower_mixed_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_mixed_conv_1_conv2d_bn"
+  top: "mixed_10_tower_mixed_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_1_conv_conv2d"
+  type: "Convolution"
+  bottom: "ch_concat_mixed_9_chconcat"
+  top: "mixed_10_tower_1_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 448
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_10_tower_1_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_1_conv_conv2d"
+  top: "mixed_10_tower_1_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_10_tower_1_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_1_conv_conv2d_bn"
+  top: "mixed_10_tower_1_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_1_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_10_tower_1_conv_conv2d_relu"
+  top: "mixed_10_tower_1_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_10_tower_1_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_1_conv_1_conv2d"
+  top: "mixed_10_tower_1_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_10_tower_1_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_1_conv_1_conv2d_bn"
+  top: "mixed_10_tower_1_conv_1_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_conv2d"
+  type: "Convolution"
+  bottom: "mixed_10_tower_1_conv_1_conv2d_relu"
+  top: "mixed_10_tower_1_mixed_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 0
+    pad_w: 1
+    kernel_h: 1
+    kernel_w: 3
+  }
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_1_mixed_conv_conv2d"
+  top: "mixed_10_tower_1_mixed_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_1_mixed_conv_conv2d_bn"
+  top: "mixed_10_tower_1_mixed_conv_conv2d_relu"
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_1_conv2d"
+  type: "Convolution"
+  bottom: "mixed_10_tower_1_conv_1_conv2d_relu"
+  top: "mixed_10_tower_1_mixed_conv_1_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 384
+    bias_term: false
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+    pad_h: 1
+    pad_w: 0
+    kernel_h: 3
+    kernel_w: 1
+  }
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_1_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d"
+  top: "mixed_10_tower_1_mixed_conv_1_conv2d_bn"
+}
+layer {
+  name: "mixed_10_tower_1_mixed_conv_1_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d_bn"
+  top: "mixed_10_tower_1_mixed_conv_1_conv2d_relu"
+}
+layer {
+  name: "MAX_pool_mixed_10_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_9_chconcat"
+  top: "MAX_pool_mixed_10_pool"
+  pooling_param {
+    pool: MAX
+    kernel_size: 3
+    stride: 1
+    pad: 1
+  }
+}
+layer {
+  name: "mixed_10_tower_2_conv_conv2d"
+  type: "Convolution"
+  bottom: "MAX_pool_mixed_10_pool"
+  top: "mixed_10_tower_2_conv_conv2d"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 192
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "gaussian"
+      std: 0.01
+    }
+  }
+}
+layer {
+  name: "mixed_10_tower_2_conv_batchnorm"
+  type: "BatchNorm"
+  bottom: "mixed_10_tower_2_conv_conv2d"
+  top: "mixed_10_tower_2_conv_conv2d_bn"
+}
+layer {
+  name: "mixed_10_tower_2_conv_relu"
+  type: "ReLU"
+  bottom: "mixed_10_tower_2_conv_conv2d_bn"
+  top: "mixed_10_tower_2_conv_conv2d_relu"
+}
+layer {
+  name: "ch_concat_mixed_10_chconcat"
+  type: "Concat"
+  bottom: "mixed_10_conv_conv2d_relu"
+  bottom: "mixed_10_tower_mixed_conv_conv2d_relu"
+  bottom: "mixed_10_tower_mixed_conv_1_conv2d_relu"
+  bottom: "mixed_10_tower_1_mixed_conv_conv2d_relu"
+  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d_relu"
+  bottom: "mixed_10_tower_2_conv_conv2d_relu"
+  top: "ch_concat_mixed_10_chconcat"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "global_pool"
+  type: "Pooling"
+  bottom: "ch_concat_mixed_10_chconcat"
+  top: "global_pool"
+  pooling_param {
+    pool: AVE
+    kernel_size: 8
+    stride: 1
+    pad: 0
+  }
+}
+layer {
+  name: "drop"
+  type: "Dropout"
+  bottom: "global_pool"
+  top: "global_pool"
+  dropout_param {
+    dropout_ratio: 0.8
+  }
+}
+layer {
+  name: "flatten"
+  type: "Flatten"
+  bottom: "global_pool"
+  top: "flatten"
+}
+layer {
+  name: "fc1"
+  type: "InnerProduct"
+  bottom: "flatten"
+  top: "fc1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 0.0
+  }
+  inner_product_param {
+    num_output: 1000
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0.0
+    }
+  }
+}
+layer {
+  name: "loss"
+  type: "SoftmaxWithLoss"
+  bottom: "fc1"
+  bottom: "label"
+  top: "loss"
+}
+layer {
+  name: "loss3/top-1"
+  type: "Accuracy"
+  bottom: "fc1"
+  bottom: "label"
+  top: "loss3/top-1"
+  include {
+    phase: TEST
+  }
+}
+layer {
+  name: "loss3/top-5"
+  type: "Accuracy"
+  bottom: "fc1"
+  bottom: "label"
+  top: "loss3/top-5"
+  include {
+    phase: TEST
+  }
+  accuracy_param {
+    top_k: 5
+  }
+}
\ No newline at end of file

From 9e6c1ba76a8c9fba307d1e36952158bdebac5253 Mon Sep 17 00:00:00 2001
From: "Shen, Haihao" <haihao.shen@intel.com>
Date: Thu, 8 Jun 2017 08:35:18 +0800
Subject: [PATCH 05/34] Align MKL-DNN external latest commit id:
 a7e17b753c622906f8bdc78f8510e023fc10daaf (2017-06-08)

---
 mkldnn.commit | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mkldnn.commit b/mkldnn.commit
index 1e2d067fb..3cb3ee7f7 100644
--- a/mkldnn.commit
+++ b/mkldnn.commit
@@ -1 +1 @@
-ecf1883a94239a19d442356d32e1076a15b88e7a
+a7e17b753c622906f8bdc78f8510e023fc10daaf

From 93a9e4d1ad60f9989a70db88151eefe9fdc937e9 Mon Sep 17 00:00:00 2001
From: Deng Daisy <daisy.deng@intel.com>
Date: Thu, 8 Jun 2017 16:04:21 +0800
Subject: [PATCH 06/34] fix icl-40 Test phase at the end of training was
 removed. Remove googlenet_v3 topology from models

---
 .../googlenet_v3/mkl/lmdb_solver.prototxt     |   16 -
 .../googlenet_v3/mkl/lmdb_train_val.prototxt  | 3877 -----------------
 .../googlenet_v3/mkl/nodata_solver.prototxt   |   16 -
 .../mkl/nodata_train_val.prototxt             | 3860 ----------------
 .../googlenet_v3/mkldnn/lmdb_solver.prototxt  |   16 -
 .../mkldnn/lmdb_train_val.prototxt            | 3594 ---------------
 .../mkldnn/nodata_solver.prototxt             |   16 -
 .../mkldnn/nodata_train_val.prototxt          | 3577 ---------------
 src/caffe/solver.cpp                          |    2 -
 9 files changed, 14974 deletions(-)
 delete mode 100644 models/intel_optimized_models/googlenet_v3/mkl/lmdb_solver.prototxt
 delete mode 100644 models/intel_optimized_models/googlenet_v3/mkl/lmdb_train_val.prototxt
 delete mode 100644 models/intel_optimized_models/googlenet_v3/mkl/nodata_solver.prototxt
 delete mode 100644 models/intel_optimized_models/googlenet_v3/mkl/nodata_train_val.prototxt
 delete mode 100644 models/intel_optimized_models/googlenet_v3/mkldnn/lmdb_solver.prototxt
 delete mode 100644 models/intel_optimized_models/googlenet_v3/mkldnn/lmdb_train_val.prototxt
 delete mode 100644 models/intel_optimized_models/googlenet_v3/mkldnn/nodata_solver.prototxt
 delete mode 100644 models/intel_optimized_models/googlenet_v3/mkldnn/nodata_train_val.prototxt

diff --git a/models/intel_optimized_models/googlenet_v3/mkl/lmdb_solver.prototxt b/models/intel_optimized_models/googlenet_v3/mkl/lmdb_solver.prototxt
deleted file mode 100644
index 2f9685402..000000000
--- a/models/intel_optimized_models/googlenet_v3/mkl/lmdb_solver.prototxt
+++ /dev/null
@@ -1,16 +0,0 @@
-net: "models/intel_optimized_models/googlenet_v3/mkl/lmdb_train_val.prototxt"
-test_iter: 1000
-test_interval: 10000
-test_initialization: false
-display: 40
-average_loss: 40
-base_lr: 0.045
-lr_policy: "step"
-stepsize: 6400
-gamma: 0.96
-max_iter: 1200000
-momentum: 0.9
-weight_decay: 0.0002
-snapshot: 50000
-snapshot_prefix: "models/intel_optimized_models/googlenet_v3/mkl/googlenet_v3_mkl_lmdb"
-solver_mode: CPU
diff --git a/models/intel_optimized_models/googlenet_v3/mkl/lmdb_train_val.prototxt b/models/intel_optimized_models/googlenet_v3/mkl/lmdb_train_val.prototxt
deleted file mode 100644
index d6d99f721..000000000
--- a/models/intel_optimized_models/googlenet_v3/mkl/lmdb_train_val.prototxt
+++ /dev/null
@@ -1,3877 +0,0 @@
-name: "InceptionV3"
-
-layer {
-  top: "data"
-  top: "label"
-  name: "data"
-  type: "Data"
-  data_param {
-    source: "/data/LMDB_300px/ilsvrc12_train_lmdb"
-    batch_size: 22
-    backend: LMDB
-#    shuffle: true
-  }
-  include {
-    phase: TRAIN
-  }
-  transform_param {
-    mirror: true
-    crop_size: 299
-#    mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
-    mean_value: 104
-    mean_value: 117
-    mean_value: 123
-  }
-}
-### Validation Set
-layer {
-  top: "data"
-  top: "label"
-  name: "data"
-  type: "Data"
-  data_param {
-    source: "/data/LMDB_300px/ilsvrc12_val_lmdb"
-    batch_size: 50
-    backend: LMDB
-  }
-  include {
-    phase: TEST
-  }
-  transform_param {
-    mirror: false
-    crop_size: 299
-#    mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
-    mean_value: 104
-    mean_value: 117
-    mean_value: 123
-  }
-}
-#--------------------
-
-
-layer {
-  name: "conv_conv2d"
-  type: "Convolution"
-  bottom: "data"
-  top: "conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 32
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "conv_conv2d"
-  top: "conv_conv2d_bn"
-
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "conv_relu"
-  type: "ReLU"
-  bottom: "conv_conv2d_bn"
-  top: "conv_conv2d_relu"
-}
-layer {
-  name: "conv_1_1_conv2d"
-  type: "Convolution"
-  bottom: "conv_conv2d_relu"
-  top: "conv_1_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 32
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "conv_1_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "conv_1_1_conv2d"
-  top: "conv_1_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "conv_1_1_relu"
-  type: "ReLU"
-  bottom: "conv_1_1_conv2d_bn"
-  top: "conv_1_1_conv2d_relu"
-}
-layer {
-  name: "conv_2_2_conv2d"
-  type: "Convolution"
-  bottom: "conv_1_1_conv2d_relu"
-  top: "conv_2_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "conv_2_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "conv_2_2_conv2d"
-  top: "conv_2_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "conv_2_2_relu"
-  type: "ReLU"
-  bottom: "conv_2_2_conv2d_bn"
-  top: "conv_2_2_conv2d_relu"
-}
-layer {
-  name: "pool"
-  type: "Pooling"
-  bottom: "conv_2_2_conv2d_relu"
-  top: "pool"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-    pad: 0
-  }
-}
-layer {
-  name: "conv_3_3_conv2d"
-  type: "Convolution"
-  bottom: "pool"
-  top: "conv_3_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 80
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "conv_3_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "conv_3_3_conv2d"
-  top: "conv_3_3_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "conv_3_3_relu"
-  type: "ReLU"
-  bottom: "conv_3_3_conv2d_bn"
-  top: "conv_3_3_conv2d_relu"
-}
-layer {
-  name: "conv_4_4_conv2d"
-  type: "Convolution"
-  bottom: "conv_3_3_conv2d_relu"
-  top: "conv_4_4_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "conv_4_4_batchnorm"
-  type: "BatchNorm"
-  bottom: "conv_4_4_conv2d"
-  top: "conv_4_4_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "conv_4_4_relu"
-  type: "ReLU"
-  bottom: "conv_4_4_conv2d_bn"
-  top: "conv_4_4_conv2d_relu"
-}
-layer {
-  name: "pool1"
-  type: "Pooling"
-  bottom: "conv_4_4_conv2d_relu"
-  top: "pool1"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-    pad: 0
-  }
-}
-layer {
-  name: "mixed_conv_conv2d"
-  type: "Convolution"
-  bottom: "pool1"
-  top: "mixed_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_conv_conv2d"
-  top: "mixed_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_conv_conv2d_bn"
-  top: "mixed_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "pool1"
-  top: "mixed_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 48
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_conv_conv2d"
-  top: "mixed_tower_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_conv_conv2d_bn"
-  top: "mixed_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_tower_conv_conv2d_relu"
-  top: "mixed_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 2
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_conv_1_conv2d"
-  top: "mixed_tower_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_conv_1_conv2d_bn"
-  top: "mixed_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "pool1"
-  top: "mixed_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_1_conv_conv2d"
-  top: "mixed_tower_1_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_1_conv_conv2d_bn"
-  top: "mixed_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_tower_1_conv_conv2d_relu"
-  top: "mixed_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_1_conv_1_conv2d"
-  top: "mixed_tower_1_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_1_conv_1_conv2d_bn"
-  top: "mixed_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_tower_1_conv_1_conv2d_relu"
-  top: "mixed_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_1_conv_2_conv2d"
-  top: "mixed_tower_1_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_1_conv_2_conv2d_bn"
-  top: "mixed_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_pool"
-  type: "Pooling"
-  bottom: "pool1"
-  top: "AVE_pool_mixed_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_pool"
-  top: "mixed_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 32
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_2_conv_conv2d"
-  top: "mixed_tower_2_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_2_conv_conv2d_bn"
-  top: "mixed_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_chconcat"
-  type: "Concat"
-  bottom: "mixed_conv_conv2d_relu"
-  bottom: "mixed_tower_conv_1_conv2d_relu"
-  bottom: "mixed_tower_1_conv_2_conv2d_relu"
-  bottom: "mixed_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_chconcat"
-  top: "mixed_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_conv_conv2d"
-  top: "mixed_1_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_1_conv_conv2d_bn"
-  top: "mixed_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_1_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_chconcat"
-  top: "mixed_1_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 48
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_conv_conv2d"
-  top: "mixed_1_tower_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_1_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_conv_conv2d_bn"
-  top: "mixed_1_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_1_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_1_tower_conv_conv2d_relu"
-  top: "mixed_1_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 2
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_conv_1_conv2d"
-  top: "mixed_1_tower_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_1_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_conv_1_conv2d_bn"
-  top: "mixed_1_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_1_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_chconcat"
-  top: "mixed_1_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_1_conv_conv2d"
-  top: "mixed_1_tower_1_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_1_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_1_conv_conv2d_bn"
-  top: "mixed_1_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_1_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_1_tower_1_conv_conv2d_relu"
-  top: "mixed_1_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_1_conv_1_conv2d"
-  top: "mixed_1_tower_1_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_1_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_1_conv_1_conv2d_bn"
-  top: "mixed_1_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_1_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_1_tower_1_conv_1_conv2d_relu"
-  top: "mixed_1_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_1_conv_2_conv2d"
-  top: "mixed_1_tower_1_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_1_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_1_conv_2_conv2d_bn"
-  top: "mixed_1_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_1_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_chconcat"
-  top: "AVE_pool_mixed_1_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_1_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_1_pool"
-  top: "mixed_1_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_2_conv_conv2d"
-  top: "mixed_1_tower_2_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_1_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_2_conv_conv2d_bn"
-  top: "mixed_1_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_1_chconcat"
-  type: "Concat"
-  bottom: "mixed_1_conv_conv2d_relu"
-  bottom: "mixed_1_tower_conv_1_conv2d_relu"
-  bottom: "mixed_1_tower_1_conv_2_conv2d_relu"
-  bottom: "mixed_1_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_1_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_1_chconcat"
-  top: "mixed_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_conv_conv2d"
-  top: "mixed_2_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_2_conv_conv2d_bn"
-  top: "mixed_2_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_2_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_1_chconcat"
-  top: "mixed_2_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 48
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_conv_conv2d"
-  top: "mixed_2_tower_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_2_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_conv_conv2d_bn"
-  top: "mixed_2_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_2_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_2_tower_conv_conv2d_relu"
-  top: "mixed_2_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 2
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_conv_1_conv2d"
-  top: "mixed_2_tower_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_2_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_conv_1_conv2d_bn"
-  top: "mixed_2_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_2_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_1_chconcat"
-  top: "mixed_2_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_1_conv_conv2d"
-  top: "mixed_2_tower_1_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_2_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_1_conv_conv2d_bn"
-  top: "mixed_2_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_2_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_2_tower_1_conv_conv2d_relu"
-  top: "mixed_2_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_1_conv_1_conv2d"
-  top: "mixed_2_tower_1_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_2_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_1_conv_1_conv2d_bn"
-  top: "mixed_2_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_2_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_2_tower_1_conv_1_conv2d_relu"
-  top: "mixed_2_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_1_conv_2_conv2d"
-  top: "mixed_2_tower_1_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_2_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_1_conv_2_conv2d_bn"
-  top: "mixed_2_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_2_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_1_chconcat"
-  top: "AVE_pool_mixed_2_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_2_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_2_pool"
-  top: "mixed_2_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_2_conv_conv2d"
-  top: "mixed_2_tower_2_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_2_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_2_conv_conv2d_bn"
-  top: "mixed_2_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_2_chconcat"
-  type: "Concat"
-  bottom: "mixed_2_conv_conv2d_relu"
-  bottom: "mixed_2_tower_conv_1_conv2d_relu"
-  bottom: "mixed_2_tower_1_conv_2_conv2d_relu"
-  bottom: "mixed_2_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_2_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_3_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_2_chconcat"
-  top: "mixed_3_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_3_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_3_conv_conv2d"
-  top: "mixed_3_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_3_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_3_conv_conv2d_bn"
-  top: "mixed_3_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_3_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_2_chconcat"
-  top: "mixed_3_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_3_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_3_tower_conv_conv2d"
-  top: "mixed_3_tower_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_3_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_3_tower_conv_conv2d_bn"
-  top: "mixed_3_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_3_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_3_tower_conv_conv2d_relu"
-  top: "mixed_3_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_3_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_3_tower_conv_1_conv2d"
-  top: "mixed_3_tower_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_3_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_3_tower_conv_1_conv2d_bn"
-  top: "mixed_3_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_3_tower_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_3_tower_conv_1_conv2d_relu"
-  top: "mixed_3_tower_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_3_tower_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_3_tower_conv_2_conv2d"
-  top: "mixed_3_tower_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_3_tower_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_3_tower_conv_2_conv2d_bn"
-  top: "mixed_3_tower_conv_2_conv2d_relu"
-}
-layer {
-  name: "max_pool_mixed_3_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_2_chconcat"
-  top: "max_pool_mixed_3_pool"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-    pad: 0
-  }
-}
-layer {
-  name: "ch_concat_mixed_3_chconcat"
-  type: "Concat"
-  bottom: "max_pool_mixed_3_pool"
-  bottom: "mixed_3_conv_conv2d_relu"
-  bottom: "mixed_3_tower_conv_2_conv2d_relu"
-  top: "ch_concat_mixed_3_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_4_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_3_chconcat"
-  top: "mixed_4_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_4_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_conv_conv2d"
-  top: "mixed_4_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_4_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_4_conv_conv2d_bn"
-  top: "mixed_4_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_3_chconcat"
-  top: "mixed_4_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_4_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_conv_conv2d"
-  top: "mixed_4_tower_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_4_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_conv_conv2d_bn"
-  top: "mixed_4_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_conv_conv2d_relu"
-  top: "mixed_4_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_4_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_conv_1_conv2d"
-  top: "mixed_4_tower_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_4_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_conv_1_conv2d_bn"
-  top: "mixed_4_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_conv_1_conv2d_relu"
-  top: "mixed_4_tower_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_4_tower_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_conv_2_conv2d"
-  top: "mixed_4_tower_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_4_tower_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_conv_2_conv2d_bn"
-  top: "mixed_4_tower_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_3_chconcat"
-  top: "mixed_4_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_1_conv_conv2d"
-  top: "mixed_4_tower_1_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_1_conv_conv2d_bn"
-  top: "mixed_4_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_1_conv_conv2d_relu"
-  top: "mixed_4_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_1_conv_1_conv2d"
-  top: "mixed_4_tower_1_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_1_conv_1_conv2d_bn"
-  top: "mixed_4_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_1_conv_1_conv2d_relu"
-  top: "mixed_4_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_1_conv_2_conv2d"
-  top: "mixed_4_tower_1_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_1_conv_2_conv2d_bn"
-  top: "mixed_4_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_1_conv_3_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_1_conv_2_conv2d_relu"
-  top: "mixed_4_tower_1_conv_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_1_conv_3_conv2d"
-  top: "mixed_4_tower_1_conv_3_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_3_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_1_conv_3_conv2d_bn"
-  top: "mixed_4_tower_1_conv_3_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_1_conv_4_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_1_conv_3_conv2d_relu"
-  top: "mixed_4_tower_1_conv_4_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_4_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_1_conv_4_conv2d"
-  top: "mixed_4_tower_1_conv_4_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_4_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_1_conv_4_conv2d_bn"
-  top: "mixed_4_tower_1_conv_4_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_4_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_3_chconcat"
-  top: "AVE_pool_mixed_4_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_4_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_4_pool"
-  top: "mixed_4_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_4_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_2_conv_conv2d"
-  top: "mixed_4_tower_2_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_4_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_2_conv_conv2d_bn"
-  top: "mixed_4_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_4_chconcat"
-  type: "Concat"
-  bottom: "mixed_4_conv_conv2d_relu"
-  bottom: "mixed_4_tower_conv_2_conv2d_relu"
-  bottom: "mixed_4_tower_1_conv_4_conv2d_relu"
-  bottom: "mixed_4_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_4_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_5_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_4_chconcat"
-  top: "mixed_5_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_5_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_conv_conv2d"
-  top: "mixed_5_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_5_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_5_conv_conv2d_bn"
-  top: "mixed_5_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_4_chconcat"
-  top: "mixed_5_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_5_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_conv_conv2d"
-  top: "mixed_5_tower_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_5_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_conv_conv2d_bn"
-  top: "mixed_5_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_conv_conv2d_relu"
-  top: "mixed_5_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_5_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_conv_1_conv2d"
-  top: "mixed_5_tower_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_5_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_conv_1_conv2d_bn"
-  top: "mixed_5_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_conv_1_conv2d_relu"
-  top: "mixed_5_tower_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_5_tower_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_conv_2_conv2d"
-  top: "mixed_5_tower_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_5_tower_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_conv_2_conv2d_bn"
-  top: "mixed_5_tower_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_4_chconcat"
-  top: "mixed_5_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_1_conv_conv2d"
-  top: "mixed_5_tower_1_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_1_conv_conv2d_bn"
-  top: "mixed_5_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_1_conv_conv2d_relu"
-  top: "mixed_5_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_1_conv_1_conv2d"
-  top: "mixed_5_tower_1_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_1_conv_1_conv2d_bn"
-  top: "mixed_5_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_1_conv_1_conv2d_relu"
-  top: "mixed_5_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_1_conv_2_conv2d"
-  top: "mixed_5_tower_1_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_1_conv_2_conv2d_bn"
-  top: "mixed_5_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_1_conv_3_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_1_conv_2_conv2d_relu"
-  top: "mixed_5_tower_1_conv_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_1_conv_3_conv2d"
-  top: "mixed_5_tower_1_conv_3_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_3_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_1_conv_3_conv2d_bn"
-  top: "mixed_5_tower_1_conv_3_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_1_conv_4_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_1_conv_3_conv2d_relu"
-  top: "mixed_5_tower_1_conv_4_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_4_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_1_conv_4_conv2d"
-  top: "mixed_5_tower_1_conv_4_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_4_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_1_conv_4_conv2d_bn"
-  top: "mixed_5_tower_1_conv_4_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_5_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_4_chconcat"
-  top: "AVE_pool_mixed_5_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_5_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_5_pool"
-  top: "mixed_5_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_5_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_2_conv_conv2d"
-  top: "mixed_5_tower_2_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_5_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_2_conv_conv2d_bn"
-  top: "mixed_5_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_5_chconcat"
-  type: "Concat"
-  bottom: "mixed_5_conv_conv2d_relu"
-  bottom: "mixed_5_tower_conv_2_conv2d_relu"
-  bottom: "mixed_5_tower_1_conv_4_conv2d_relu"
-  bottom: "mixed_5_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_5_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_6_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_5_chconcat"
-  top: "mixed_6_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_6_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_conv_conv2d"
-  top: "mixed_6_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_6_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_6_conv_conv2d_bn"
-  top: "mixed_6_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_5_chconcat"
-  top: "mixed_6_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_6_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_conv_conv2d"
-  top: "mixed_6_tower_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_6_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_conv_conv2d_bn"
-  top: "mixed_6_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_conv_conv2d_relu"
-  top: "mixed_6_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_6_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_conv_1_conv2d"
-  top: "mixed_6_tower_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_6_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_conv_1_conv2d_bn"
-  top: "mixed_6_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_conv_1_conv2d_relu"
-  top: "mixed_6_tower_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_6_tower_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_conv_2_conv2d"
-  top: "mixed_6_tower_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_6_tower_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_conv_2_conv2d_bn"
-  top: "mixed_6_tower_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_5_chconcat"
-  top: "mixed_6_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_1_conv_conv2d"
-  top: "mixed_6_tower_1_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_1_conv_conv2d_bn"
-  top: "mixed_6_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_1_conv_conv2d_relu"
-  top: "mixed_6_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_1_conv_1_conv2d"
-  top: "mixed_6_tower_1_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_1_conv_1_conv2d_bn"
-  top: "mixed_6_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_1_conv_1_conv2d_relu"
-  top: "mixed_6_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_1_conv_2_conv2d"
-  top: "mixed_6_tower_1_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_1_conv_2_conv2d_bn"
-  top: "mixed_6_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_1_conv_3_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_1_conv_2_conv2d_relu"
-  top: "mixed_6_tower_1_conv_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_1_conv_3_conv2d"
-  top: "mixed_6_tower_1_conv_3_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_3_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_1_conv_3_conv2d_bn"
-  top: "mixed_6_tower_1_conv_3_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_1_conv_4_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_1_conv_3_conv2d_relu"
-  top: "mixed_6_tower_1_conv_4_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_4_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_1_conv_4_conv2d"
-  top: "mixed_6_tower_1_conv_4_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_4_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_1_conv_4_conv2d_bn"
-  top: "mixed_6_tower_1_conv_4_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_6_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_5_chconcat"
-  top: "AVE_pool_mixed_6_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_6_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_6_pool"
-  top: "mixed_6_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_6_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_2_conv_conv2d"
-  top: "mixed_6_tower_2_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_6_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_2_conv_conv2d_bn"
-  top: "mixed_6_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_6_chconcat"
-  type: "Concat"
-  bottom: "mixed_6_conv_conv2d_relu"
-  bottom: "mixed_6_tower_conv_2_conv2d_relu"
-  bottom: "mixed_6_tower_1_conv_4_conv2d_relu"
-  bottom: "mixed_6_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_6_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_7_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_6_chconcat"
-  top: "mixed_7_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_7_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_conv_conv2d"
-  top: "mixed_7_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_7_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_7_conv_conv2d_bn"
-  top: "mixed_7_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_6_chconcat"
-  top: "mixed_7_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_7_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_conv_conv2d"
-  top: "mixed_7_tower_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_7_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_conv_conv2d_bn"
-  top: "mixed_7_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_conv_conv2d_relu"
-  top: "mixed_7_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_7_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_conv_1_conv2d"
-  top: "mixed_7_tower_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_7_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_conv_1_conv2d_bn"
-  top: "mixed_7_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_conv_1_conv2d_relu"
-  top: "mixed_7_tower_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_7_tower_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_conv_2_conv2d"
-  top: "mixed_7_tower_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_7_tower_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_conv_2_conv2d_bn"
-  top: "mixed_7_tower_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_6_chconcat"
-  top: "mixed_7_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_1_conv_conv2d"
-  top: "mixed_7_tower_1_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_1_conv_conv2d_bn"
-  top: "mixed_7_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_1_conv_conv2d_relu"
-  top: "mixed_7_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_1_conv_1_conv2d"
-  top: "mixed_7_tower_1_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_1_conv_1_conv2d_bn"
-  top: "mixed_7_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_1_conv_1_conv2d_relu"
-  top: "mixed_7_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_1_conv_2_conv2d"
-  top: "mixed_7_tower_1_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_1_conv_2_conv2d_bn"
-  top: "mixed_7_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_1_conv_3_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_1_conv_2_conv2d_relu"
-  top: "mixed_7_tower_1_conv_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_1_conv_3_conv2d"
-  top: "mixed_7_tower_1_conv_3_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_3_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_1_conv_3_conv2d_bn"
-  top: "mixed_7_tower_1_conv_3_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_1_conv_4_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_1_conv_3_conv2d_relu"
-  top: "mixed_7_tower_1_conv_4_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_4_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_1_conv_4_conv2d"
-  top: "mixed_7_tower_1_conv_4_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_4_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_1_conv_4_conv2d_bn"
-  top: "mixed_7_tower_1_conv_4_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_7_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_6_chconcat"
-  top: "AVE_pool_mixed_7_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_7_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_7_pool"
-  top: "mixed_7_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_7_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_2_conv_conv2d"
-  top: "mixed_7_tower_2_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_7_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_2_conv_conv2d_bn"
-  top: "mixed_7_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_7_chconcat"
-  type: "Concat"
-  bottom: "mixed_7_conv_conv2d_relu"
-  bottom: "mixed_7_tower_conv_2_conv2d_relu"
-  bottom: "mixed_7_tower_1_conv_4_conv2d_relu"
-  bottom: "mixed_7_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_7_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_8_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_7_chconcat"
-  top: "mixed_8_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_8_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_conv_conv2d"
-  top: "mixed_8_tower_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_8_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_conv_conv2d_bn"
-  top: "mixed_8_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_8_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_8_tower_conv_conv2d_relu"
-  top: "mixed_8_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 320
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_8_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_conv_1_conv2d"
-  top: "mixed_8_tower_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_8_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_conv_1_conv2d_bn"
-  top: "mixed_8_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_8_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_7_chconcat"
-  top: "mixed_8_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_1_conv_conv2d"
-  top: "mixed_8_tower_1_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_1_conv_conv2d_bn"
-  top: "mixed_8_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_8_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_8_tower_1_conv_conv2d_relu"
-  top: "mixed_8_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_1_conv_1_conv2d"
-  top: "mixed_8_tower_1_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_1_conv_1_conv2d_bn"
-  top: "mixed_8_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_8_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_8_tower_1_conv_1_conv2d_relu"
-  top: "mixed_8_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_1_conv_2_conv2d"
-  top: "mixed_8_tower_1_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_1_conv_2_conv2d_bn"
-  top: "mixed_8_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_8_tower_1_conv_3_conv2d"
-  type: "Convolution"
-  bottom: "mixed_8_tower_1_conv_2_conv2d_relu"
-  top: "mixed_8_tower_1_conv_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_1_conv_3_conv2d"
-  top: "mixed_8_tower_1_conv_3_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_3_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_1_conv_3_conv2d_bn"
-  top: "mixed_8_tower_1_conv_3_conv2d_relu"
-}
-layer {
-  name: "MAX_pool_mixed_8_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_7_chconcat"
-  top: "MAX_pool_mixed_8_pool"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-    pad: 0
-  }
-}
-layer {
-  name: "ch_concat_mixed_8_chconcat"
-  type: "Concat"
-  bottom: "mixed_8_tower_conv_1_conv2d_relu"
-  bottom: "mixed_8_tower_1_conv_3_conv2d_relu"
-  bottom: "MAX_pool_mixed_8_pool"
-  top: "ch_concat_mixed_8_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_9_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_8_chconcat"
-  top: "mixed_9_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 320
-    bias_term: false
-    pad: 0
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    kernel_h: 1
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_9_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_conv_conv2d"
-  top: "mixed_9_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_9_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_conv_conv2d_bn"
-  top: "mixed_9_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_8_chconcat"
-  top: "mixed_9_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_9_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_conv_conv2d"
-  top: "mixed_9_tower_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_9_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_conv_conv2d_bn"
-  top: "mixed_9_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_conv2d"
-  type: "Convolution"
-  bottom: "mixed_9_tower_conv_conv2d_relu"
-  top: "mixed_9_tower_mixed_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 1
-    kernel_h: 1
-    kernel_w: 3
-  }
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_mixed_conv_conv2d"
-  top: "mixed_9_tower_mixed_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_mixed_conv_conv2d_bn"
-  top: "mixed_9_tower_mixed_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_9_tower_conv_conv2d_relu"
-  top: "mixed_9_tower_mixed_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 1
-    pad_w: 0
-    kernel_h: 3
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_mixed_conv_1_conv2d"
-  top: "mixed_9_tower_mixed_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_mixed_conv_1_conv2d_bn"
-  top: "mixed_9_tower_mixed_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_8_chconcat"
-  top: "mixed_9_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 448
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_9_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_1_conv_conv2d"
-  top: "mixed_9_tower_1_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_9_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_1_conv_conv2d_bn"
-  top: "mixed_9_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_9_tower_1_conv_conv2d_relu"
-  top: "mixed_9_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_9_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_1_conv_1_conv2d"
-  top: "mixed_9_tower_1_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_9_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_1_conv_1_conv2d_bn"
-  top: "mixed_9_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_conv2d"
-  type: "Convolution"
-  bottom: "mixed_9_tower_1_conv_1_conv2d_relu"
-  top: "mixed_9_tower_1_mixed_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 1
-    kernel_h: 1
-    kernel_w: 3
-  }
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_1_mixed_conv_conv2d"
-  top: "mixed_9_tower_1_mixed_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_1_mixed_conv_conv2d_bn"
-  top: "mixed_9_tower_1_mixed_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_9_tower_1_conv_1_conv2d_relu"
-  top: "mixed_9_tower_1_mixed_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 1
-    pad_w: 0
-    kernel_h: 3
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d"
-  top: "mixed_9_tower_1_mixed_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d_bn"
-  top: "mixed_9_tower_1_mixed_conv_1_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_9_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_8_chconcat"
-  top: "AVE_pool_mixed_9_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_9_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_9_pool"
-  top: "mixed_9_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_9_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_2_conv_conv2d"
-  top: "mixed_9_tower_2_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_9_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_2_conv_conv2d_bn"
-  top: "mixed_9_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_9_chconcat"
-  type: "Concat"
-  bottom: "mixed_9_conv_conv2d_relu"
-  bottom: "mixed_9_tower_mixed_conv_conv2d_relu"
-  bottom: "mixed_9_tower_mixed_conv_1_conv2d_relu"
-  bottom: "mixed_9_tower_1_mixed_conv_conv2d_relu"
-  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d_relu"
-  bottom: "mixed_9_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_9_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_10_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_9_chconcat"
-  top: "mixed_10_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 320
-    bias_term: false
-    pad: 0
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    kernel_h: 1
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_10_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_conv_conv2d"
-  top: "mixed_10_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_10_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_conv_conv2d_bn"
-  top: "mixed_10_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_9_chconcat"
-  top: "mixed_10_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_10_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_conv_conv2d"
-  top: "mixed_10_tower_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_10_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_conv_conv2d_bn"
-  top: "mixed_10_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_conv2d"
-  type: "Convolution"
-  bottom: "mixed_10_tower_conv_conv2d_relu"
-  top: "mixed_10_tower_mixed_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 1
-    kernel_h: 1
-    kernel_w: 3
-  }
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_mixed_conv_conv2d"
-  top: "mixed_10_tower_mixed_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_mixed_conv_conv2d_bn"
-  top: "mixed_10_tower_mixed_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_10_tower_conv_conv2d_relu"
-  top: "mixed_10_tower_mixed_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 1
-    pad_w: 0
-    kernel_h: 3
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_mixed_conv_1_conv2d"
-  top: "mixed_10_tower_mixed_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_mixed_conv_1_conv2d_bn"
-  top: "mixed_10_tower_mixed_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_9_chconcat"
-  top: "mixed_10_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 448
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_10_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_1_conv_conv2d"
-  top: "mixed_10_tower_1_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_10_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_1_conv_conv2d_bn"
-  top: "mixed_10_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_10_tower_1_conv_conv2d_relu"
-  top: "mixed_10_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_10_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_1_conv_1_conv2d"
-  top: "mixed_10_tower_1_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_10_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_1_conv_1_conv2d_bn"
-  top: "mixed_10_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_conv2d"
-  type: "Convolution"
-  bottom: "mixed_10_tower_1_conv_1_conv2d_relu"
-  top: "mixed_10_tower_1_mixed_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 1
-    kernel_h: 1
-    kernel_w: 3
-  }
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_1_mixed_conv_conv2d"
-  top: "mixed_10_tower_1_mixed_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_1_mixed_conv_conv2d_bn"
-  top: "mixed_10_tower_1_mixed_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_10_tower_1_conv_1_conv2d_relu"
-  top: "mixed_10_tower_1_mixed_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 1
-    pad_w: 0
-    kernel_h: 3
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d"
-  top: "mixed_10_tower_1_mixed_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d_bn"
-  top: "mixed_10_tower_1_mixed_conv_1_conv2d_relu"
-}
-layer {
-  name: "MAX_pool_mixed_10_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_9_chconcat"
-  top: "MAX_pool_mixed_10_pool"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_10_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "MAX_pool_mixed_10_pool"
-  top: "mixed_10_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_10_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_2_conv_conv2d"
-  top: "mixed_10_tower_2_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_10_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_2_conv_conv2d_bn"
-  top: "mixed_10_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_10_chconcat"
-  type: "Concat"
-  bottom: "mixed_10_conv_conv2d_relu"
-  bottom: "mixed_10_tower_mixed_conv_conv2d_relu"
-  bottom: "mixed_10_tower_mixed_conv_1_conv2d_relu"
-  bottom: "mixed_10_tower_1_mixed_conv_conv2d_relu"
-  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d_relu"
-  bottom: "mixed_10_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_10_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "global_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_10_chconcat"
-  top: "global_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 8
-    stride: 1
-    pad: 0
-  }
-}
-layer {
-  name: "drop"
-  type: "Dropout"
-  bottom: "global_pool"
-  top: "global_pool"
-  dropout_param {
-    dropout_ratio: 0.8
-  }
-}
-layer {
-  name: "flatten"
-  type: "Flatten"
-  bottom: "global_pool"
-  top: "flatten"
-}
-layer {
-  name: "fc1"
-  type: "InnerProduct"
-  bottom: "flatten"
-  top: "fc1"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  param {
-    lr_mult: 2.0
-    decay_mult: 0.0
-  }
-  inner_product_param {
-    num_output: 1000
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0.0
-    }
-  }
-}
-layer {
-  name: "loss"
-  type: "SoftmaxWithLoss"
-  bottom: "fc1"
-  bottom: "label"
-  top: "loss"
-}
-layer {
-  name: "loss3/top-1"
-  type: "Accuracy"
-  bottom: "fc1"
-  bottom: "label"
-  top: "loss3/top-1"
-  include {
-    phase: TEST
-  }
-}
-layer {
-  name: "loss3/top-5"
-  type: "Accuracy"
-  bottom: "fc1"
-  bottom: "label"
-  top: "loss3/top-5"
-  include {
-    phase: TEST
-  }
-  accuracy_param {
-    top_k: 5
-  }
-}
\ No newline at end of file
diff --git a/models/intel_optimized_models/googlenet_v3/mkl/nodata_solver.prototxt b/models/intel_optimized_models/googlenet_v3/mkl/nodata_solver.prototxt
deleted file mode 100644
index ab76852c9..000000000
--- a/models/intel_optimized_models/googlenet_v3/mkl/nodata_solver.prototxt
+++ /dev/null
@@ -1,16 +0,0 @@
-net: "models/intel_optimized_models/googlenet_v3/mkl/nodata_train_val.prototxt"
-test_iter: 1000
-test_interval: 10000
-test_initialization: false
-display: 40
-average_loss: 40
-base_lr: 0.045
-lr_policy: "step"
-stepsize: 6400
-gamma: 0.96
-max_iter: 1200000
-momentum: 0.9
-weight_decay: 0.0002
-snapshot: 50000
-snapshot_prefix: "models/intel_optimized_models/googlenet_v3/mkl/googlenet_v3_mkl_nodata"
-solver_mode: CPU
diff --git a/models/intel_optimized_models/googlenet_v3/mkl/nodata_train_val.prototxt b/models/intel_optimized_models/googlenet_v3/mkl/nodata_train_val.prototxt
deleted file mode 100644
index 57ba235ac..000000000
--- a/models/intel_optimized_models/googlenet_v3/mkl/nodata_train_val.prototxt
+++ /dev/null
@@ -1,3860 +0,0 @@
-name: "InceptionV3"
-
-layer {
-  name: "data"
-  type: "DummyData"
-  top: "data"
-  include {
-    phase: TRAIN
-  }
-  dummy_data_param {
-    shape: { dim: 22 dim: 3 dim: 299 dim: 299 }
-    data_filler {
-      type: "constant"
-      value: 0.01
-    }
-  }
-}
-layer {
-  name: "data"
-  type: "DummyData"
-  top: "label"
-  include {
-    phase: TRAIN
-  }
-  dummy_data_param {
-    shape: { dim: 22  }
-    data_filler {
-      type: "constant"
-    }
-  }
-}
-#--------------------
-
-
-layer {
-  name: "conv_conv2d"
-  type: "Convolution"
-  bottom: "data"
-  top: "conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 32
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "conv_conv2d"
-  top: "conv_conv2d_bn"
-
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "conv_relu"
-  type: "ReLU"
-  bottom: "conv_conv2d_bn"
-  top: "conv_conv2d_relu"
-}
-layer {
-  name: "conv_1_1_conv2d"
-  type: "Convolution"
-  bottom: "conv_conv2d_relu"
-  top: "conv_1_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 32
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "conv_1_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "conv_1_1_conv2d"
-  top: "conv_1_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "conv_1_1_relu"
-  type: "ReLU"
-  bottom: "conv_1_1_conv2d_bn"
-  top: "conv_1_1_conv2d_relu"
-}
-layer {
-  name: "conv_2_2_conv2d"
-  type: "Convolution"
-  bottom: "conv_1_1_conv2d_relu"
-  top: "conv_2_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "conv_2_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "conv_2_2_conv2d"
-  top: "conv_2_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "conv_2_2_relu"
-  type: "ReLU"
-  bottom: "conv_2_2_conv2d_bn"
-  top: "conv_2_2_conv2d_relu"
-}
-layer {
-  name: "pool"
-  type: "Pooling"
-  bottom: "conv_2_2_conv2d_relu"
-  top: "pool"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-    pad: 0
-  }
-}
-layer {
-  name: "conv_3_3_conv2d"
-  type: "Convolution"
-  bottom: "pool"
-  top: "conv_3_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 80
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "conv_3_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "conv_3_3_conv2d"
-  top: "conv_3_3_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "conv_3_3_relu"
-  type: "ReLU"
-  bottom: "conv_3_3_conv2d_bn"
-  top: "conv_3_3_conv2d_relu"
-}
-layer {
-  name: "conv_4_4_conv2d"
-  type: "Convolution"
-  bottom: "conv_3_3_conv2d_relu"
-  top: "conv_4_4_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "conv_4_4_batchnorm"
-  type: "BatchNorm"
-  bottom: "conv_4_4_conv2d"
-  top: "conv_4_4_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "conv_4_4_relu"
-  type: "ReLU"
-  bottom: "conv_4_4_conv2d_bn"
-  top: "conv_4_4_conv2d_relu"
-}
-layer {
-  name: "pool1"
-  type: "Pooling"
-  bottom: "conv_4_4_conv2d_relu"
-  top: "pool1"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-    pad: 0
-  }
-}
-layer {
-  name: "mixed_conv_conv2d"
-  type: "Convolution"
-  bottom: "pool1"
-  top: "mixed_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_conv_conv2d"
-  top: "mixed_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_conv_conv2d_bn"
-  top: "mixed_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "pool1"
-  top: "mixed_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 48
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_conv_conv2d"
-  top: "mixed_tower_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_conv_conv2d_bn"
-  top: "mixed_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_tower_conv_conv2d_relu"
-  top: "mixed_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 2
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_conv_1_conv2d"
-  top: "mixed_tower_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_conv_1_conv2d_bn"
-  top: "mixed_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "pool1"
-  top: "mixed_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_1_conv_conv2d"
-  top: "mixed_tower_1_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_1_conv_conv2d_bn"
-  top: "mixed_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_tower_1_conv_conv2d_relu"
-  top: "mixed_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_1_conv_1_conv2d"
-  top: "mixed_tower_1_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_1_conv_1_conv2d_bn"
-  top: "mixed_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_tower_1_conv_1_conv2d_relu"
-  top: "mixed_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_1_conv_2_conv2d"
-  top: "mixed_tower_1_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_1_conv_2_conv2d_bn"
-  top: "mixed_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_pool"
-  type: "Pooling"
-  bottom: "pool1"
-  top: "AVE_pool_mixed_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_pool"
-  top: "mixed_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 32
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_2_conv_conv2d"
-  top: "mixed_tower_2_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_2_conv_conv2d_bn"
-  top: "mixed_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_chconcat"
-  type: "Concat"
-  bottom: "mixed_conv_conv2d_relu"
-  bottom: "mixed_tower_conv_1_conv2d_relu"
-  bottom: "mixed_tower_1_conv_2_conv2d_relu"
-  bottom: "mixed_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_chconcat"
-  top: "mixed_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_conv_conv2d"
-  top: "mixed_1_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_1_conv_conv2d_bn"
-  top: "mixed_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_1_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_chconcat"
-  top: "mixed_1_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 48
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_conv_conv2d"
-  top: "mixed_1_tower_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_1_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_conv_conv2d_bn"
-  top: "mixed_1_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_1_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_1_tower_conv_conv2d_relu"
-  top: "mixed_1_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 2
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_conv_1_conv2d"
-  top: "mixed_1_tower_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_1_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_conv_1_conv2d_bn"
-  top: "mixed_1_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_1_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_chconcat"
-  top: "mixed_1_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_1_conv_conv2d"
-  top: "mixed_1_tower_1_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_1_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_1_conv_conv2d_bn"
-  top: "mixed_1_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_1_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_1_tower_1_conv_conv2d_relu"
-  top: "mixed_1_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_1_conv_1_conv2d"
-  top: "mixed_1_tower_1_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_1_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_1_conv_1_conv2d_bn"
-  top: "mixed_1_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_1_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_1_tower_1_conv_1_conv2d_relu"
-  top: "mixed_1_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_1_conv_2_conv2d"
-  top: "mixed_1_tower_1_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_1_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_1_conv_2_conv2d_bn"
-  top: "mixed_1_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_1_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_chconcat"
-  top: "AVE_pool_mixed_1_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_1_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_1_pool"
-  top: "mixed_1_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_2_conv_conv2d"
-  top: "mixed_1_tower_2_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_1_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_2_conv_conv2d_bn"
-  top: "mixed_1_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_1_chconcat"
-  type: "Concat"
-  bottom: "mixed_1_conv_conv2d_relu"
-  bottom: "mixed_1_tower_conv_1_conv2d_relu"
-  bottom: "mixed_1_tower_1_conv_2_conv2d_relu"
-  bottom: "mixed_1_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_1_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_1_chconcat"
-  top: "mixed_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_conv_conv2d"
-  top: "mixed_2_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_2_conv_conv2d_bn"
-  top: "mixed_2_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_2_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_1_chconcat"
-  top: "mixed_2_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 48
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_conv_conv2d"
-  top: "mixed_2_tower_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_2_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_conv_conv2d_bn"
-  top: "mixed_2_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_2_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_2_tower_conv_conv2d_relu"
-  top: "mixed_2_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 2
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_conv_1_conv2d"
-  top: "mixed_2_tower_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_2_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_conv_1_conv2d_bn"
-  top: "mixed_2_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_2_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_1_chconcat"
-  top: "mixed_2_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_1_conv_conv2d"
-  top: "mixed_2_tower_1_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_2_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_1_conv_conv2d_bn"
-  top: "mixed_2_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_2_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_2_tower_1_conv_conv2d_relu"
-  top: "mixed_2_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_1_conv_1_conv2d"
-  top: "mixed_2_tower_1_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_2_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_1_conv_1_conv2d_bn"
-  top: "mixed_2_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_2_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_2_tower_1_conv_1_conv2d_relu"
-  top: "mixed_2_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_1_conv_2_conv2d"
-  top: "mixed_2_tower_1_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_2_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_1_conv_2_conv2d_bn"
-  top: "mixed_2_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_2_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_1_chconcat"
-  top: "AVE_pool_mixed_2_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_2_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_2_pool"
-  top: "mixed_2_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_2_conv_conv2d"
-  top: "mixed_2_tower_2_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_2_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_2_conv_conv2d_bn"
-  top: "mixed_2_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_2_chconcat"
-  type: "Concat"
-  bottom: "mixed_2_conv_conv2d_relu"
-  bottom: "mixed_2_tower_conv_1_conv2d_relu"
-  bottom: "mixed_2_tower_1_conv_2_conv2d_relu"
-  bottom: "mixed_2_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_2_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_3_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_2_chconcat"
-  top: "mixed_3_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_3_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_3_conv_conv2d"
-  top: "mixed_3_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_3_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_3_conv_conv2d_bn"
-  top: "mixed_3_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_3_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_2_chconcat"
-  top: "mixed_3_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_3_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_3_tower_conv_conv2d"
-  top: "mixed_3_tower_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_3_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_3_tower_conv_conv2d_bn"
-  top: "mixed_3_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_3_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_3_tower_conv_conv2d_relu"
-  top: "mixed_3_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_3_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_3_tower_conv_1_conv2d"
-  top: "mixed_3_tower_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_3_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_3_tower_conv_1_conv2d_bn"
-  top: "mixed_3_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_3_tower_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_3_tower_conv_1_conv2d_relu"
-  top: "mixed_3_tower_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_3_tower_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_3_tower_conv_2_conv2d"
-  top: "mixed_3_tower_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_3_tower_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_3_tower_conv_2_conv2d_bn"
-  top: "mixed_3_tower_conv_2_conv2d_relu"
-}
-layer {
-  name: "max_pool_mixed_3_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_2_chconcat"
-  top: "max_pool_mixed_3_pool"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-    pad: 0
-  }
-}
-layer {
-  name: "ch_concat_mixed_3_chconcat"
-  type: "Concat"
-  bottom: "max_pool_mixed_3_pool"
-  bottom: "mixed_3_conv_conv2d_relu"
-  bottom: "mixed_3_tower_conv_2_conv2d_relu"
-  top: "ch_concat_mixed_3_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_4_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_3_chconcat"
-  top: "mixed_4_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_4_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_conv_conv2d"
-  top: "mixed_4_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_4_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_4_conv_conv2d_bn"
-  top: "mixed_4_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_3_chconcat"
-  top: "mixed_4_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_4_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_conv_conv2d"
-  top: "mixed_4_tower_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_4_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_conv_conv2d_bn"
-  top: "mixed_4_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_conv_conv2d_relu"
-  top: "mixed_4_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_4_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_conv_1_conv2d"
-  top: "mixed_4_tower_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_4_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_conv_1_conv2d_bn"
-  top: "mixed_4_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_conv_1_conv2d_relu"
-  top: "mixed_4_tower_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_4_tower_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_conv_2_conv2d"
-  top: "mixed_4_tower_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_4_tower_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_conv_2_conv2d_bn"
-  top: "mixed_4_tower_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_3_chconcat"
-  top: "mixed_4_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_1_conv_conv2d"
-  top: "mixed_4_tower_1_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_1_conv_conv2d_bn"
-  top: "mixed_4_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_1_conv_conv2d_relu"
-  top: "mixed_4_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_1_conv_1_conv2d"
-  top: "mixed_4_tower_1_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_1_conv_1_conv2d_bn"
-  top: "mixed_4_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_1_conv_1_conv2d_relu"
-  top: "mixed_4_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_1_conv_2_conv2d"
-  top: "mixed_4_tower_1_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_1_conv_2_conv2d_bn"
-  top: "mixed_4_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_1_conv_3_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_1_conv_2_conv2d_relu"
-  top: "mixed_4_tower_1_conv_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_1_conv_3_conv2d"
-  top: "mixed_4_tower_1_conv_3_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_3_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_1_conv_3_conv2d_bn"
-  top: "mixed_4_tower_1_conv_3_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_1_conv_4_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_1_conv_3_conv2d_relu"
-  top: "mixed_4_tower_1_conv_4_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_4_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_1_conv_4_conv2d"
-  top: "mixed_4_tower_1_conv_4_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_4_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_1_conv_4_conv2d_bn"
-  top: "mixed_4_tower_1_conv_4_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_4_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_3_chconcat"
-  top: "AVE_pool_mixed_4_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_4_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_4_pool"
-  top: "mixed_4_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_4_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_2_conv_conv2d"
-  top: "mixed_4_tower_2_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_4_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_2_conv_conv2d_bn"
-  top: "mixed_4_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_4_chconcat"
-  type: "Concat"
-  bottom: "mixed_4_conv_conv2d_relu"
-  bottom: "mixed_4_tower_conv_2_conv2d_relu"
-  bottom: "mixed_4_tower_1_conv_4_conv2d_relu"
-  bottom: "mixed_4_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_4_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_5_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_4_chconcat"
-  top: "mixed_5_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_5_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_conv_conv2d"
-  top: "mixed_5_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_5_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_5_conv_conv2d_bn"
-  top: "mixed_5_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_4_chconcat"
-  top: "mixed_5_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_5_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_conv_conv2d"
-  top: "mixed_5_tower_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_5_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_conv_conv2d_bn"
-  top: "mixed_5_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_conv_conv2d_relu"
-  top: "mixed_5_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_5_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_conv_1_conv2d"
-  top: "mixed_5_tower_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_5_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_conv_1_conv2d_bn"
-  top: "mixed_5_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_conv_1_conv2d_relu"
-  top: "mixed_5_tower_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_5_tower_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_conv_2_conv2d"
-  top: "mixed_5_tower_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_5_tower_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_conv_2_conv2d_bn"
-  top: "mixed_5_tower_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_4_chconcat"
-  top: "mixed_5_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_1_conv_conv2d"
-  top: "mixed_5_tower_1_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_1_conv_conv2d_bn"
-  top: "mixed_5_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_1_conv_conv2d_relu"
-  top: "mixed_5_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_1_conv_1_conv2d"
-  top: "mixed_5_tower_1_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_1_conv_1_conv2d_bn"
-  top: "mixed_5_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_1_conv_1_conv2d_relu"
-  top: "mixed_5_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_1_conv_2_conv2d"
-  top: "mixed_5_tower_1_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_1_conv_2_conv2d_bn"
-  top: "mixed_5_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_1_conv_3_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_1_conv_2_conv2d_relu"
-  top: "mixed_5_tower_1_conv_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_1_conv_3_conv2d"
-  top: "mixed_5_tower_1_conv_3_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_3_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_1_conv_3_conv2d_bn"
-  top: "mixed_5_tower_1_conv_3_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_1_conv_4_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_1_conv_3_conv2d_relu"
-  top: "mixed_5_tower_1_conv_4_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_4_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_1_conv_4_conv2d"
-  top: "mixed_5_tower_1_conv_4_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_4_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_1_conv_4_conv2d_bn"
-  top: "mixed_5_tower_1_conv_4_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_5_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_4_chconcat"
-  top: "AVE_pool_mixed_5_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_5_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_5_pool"
-  top: "mixed_5_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_5_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_2_conv_conv2d"
-  top: "mixed_5_tower_2_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_5_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_2_conv_conv2d_bn"
-  top: "mixed_5_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_5_chconcat"
-  type: "Concat"
-  bottom: "mixed_5_conv_conv2d_relu"
-  bottom: "mixed_5_tower_conv_2_conv2d_relu"
-  bottom: "mixed_5_tower_1_conv_4_conv2d_relu"
-  bottom: "mixed_5_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_5_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_6_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_5_chconcat"
-  top: "mixed_6_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_6_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_conv_conv2d"
-  top: "mixed_6_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_6_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_6_conv_conv2d_bn"
-  top: "mixed_6_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_5_chconcat"
-  top: "mixed_6_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_6_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_conv_conv2d"
-  top: "mixed_6_tower_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_6_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_conv_conv2d_bn"
-  top: "mixed_6_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_conv_conv2d_relu"
-  top: "mixed_6_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_6_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_conv_1_conv2d"
-  top: "mixed_6_tower_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_6_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_conv_1_conv2d_bn"
-  top: "mixed_6_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_conv_1_conv2d_relu"
-  top: "mixed_6_tower_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_6_tower_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_conv_2_conv2d"
-  top: "mixed_6_tower_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_6_tower_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_conv_2_conv2d_bn"
-  top: "mixed_6_tower_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_5_chconcat"
-  top: "mixed_6_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_1_conv_conv2d"
-  top: "mixed_6_tower_1_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_1_conv_conv2d_bn"
-  top: "mixed_6_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_1_conv_conv2d_relu"
-  top: "mixed_6_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_1_conv_1_conv2d"
-  top: "mixed_6_tower_1_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_1_conv_1_conv2d_bn"
-  top: "mixed_6_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_1_conv_1_conv2d_relu"
-  top: "mixed_6_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_1_conv_2_conv2d"
-  top: "mixed_6_tower_1_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_1_conv_2_conv2d_bn"
-  top: "mixed_6_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_1_conv_3_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_1_conv_2_conv2d_relu"
-  top: "mixed_6_tower_1_conv_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_1_conv_3_conv2d"
-  top: "mixed_6_tower_1_conv_3_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_3_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_1_conv_3_conv2d_bn"
-  top: "mixed_6_tower_1_conv_3_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_1_conv_4_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_1_conv_3_conv2d_relu"
-  top: "mixed_6_tower_1_conv_4_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_4_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_1_conv_4_conv2d"
-  top: "mixed_6_tower_1_conv_4_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_4_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_1_conv_4_conv2d_bn"
-  top: "mixed_6_tower_1_conv_4_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_6_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_5_chconcat"
-  top: "AVE_pool_mixed_6_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_6_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_6_pool"
-  top: "mixed_6_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_6_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_2_conv_conv2d"
-  top: "mixed_6_tower_2_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_6_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_2_conv_conv2d_bn"
-  top: "mixed_6_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_6_chconcat"
-  type: "Concat"
-  bottom: "mixed_6_conv_conv2d_relu"
-  bottom: "mixed_6_tower_conv_2_conv2d_relu"
-  bottom: "mixed_6_tower_1_conv_4_conv2d_relu"
-  bottom: "mixed_6_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_6_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_7_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_6_chconcat"
-  top: "mixed_7_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_7_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_conv_conv2d"
-  top: "mixed_7_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_7_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_7_conv_conv2d_bn"
-  top: "mixed_7_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_6_chconcat"
-  top: "mixed_7_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_7_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_conv_conv2d"
-  top: "mixed_7_tower_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_7_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_conv_conv2d_bn"
-  top: "mixed_7_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_conv_conv2d_relu"
-  top: "mixed_7_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_7_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_conv_1_conv2d"
-  top: "mixed_7_tower_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_7_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_conv_1_conv2d_bn"
-  top: "mixed_7_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_conv_1_conv2d_relu"
-  top: "mixed_7_tower_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_7_tower_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_conv_2_conv2d"
-  top: "mixed_7_tower_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_7_tower_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_conv_2_conv2d_bn"
-  top: "mixed_7_tower_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_6_chconcat"
-  top: "mixed_7_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_1_conv_conv2d"
-  top: "mixed_7_tower_1_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_1_conv_conv2d_bn"
-  top: "mixed_7_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_1_conv_conv2d_relu"
-  top: "mixed_7_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_1_conv_1_conv2d"
-  top: "mixed_7_tower_1_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_1_conv_1_conv2d_bn"
-  top: "mixed_7_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_1_conv_1_conv2d_relu"
-  top: "mixed_7_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_1_conv_2_conv2d"
-  top: "mixed_7_tower_1_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_1_conv_2_conv2d_bn"
-  top: "mixed_7_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_1_conv_3_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_1_conv_2_conv2d_relu"
-  top: "mixed_7_tower_1_conv_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_1_conv_3_conv2d"
-  top: "mixed_7_tower_1_conv_3_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_3_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_1_conv_3_conv2d_bn"
-  top: "mixed_7_tower_1_conv_3_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_1_conv_4_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_1_conv_3_conv2d_relu"
-  top: "mixed_7_tower_1_conv_4_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_4_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_1_conv_4_conv2d"
-  top: "mixed_7_tower_1_conv_4_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_4_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_1_conv_4_conv2d_bn"
-  top: "mixed_7_tower_1_conv_4_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_7_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_6_chconcat"
-  top: "AVE_pool_mixed_7_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_7_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_7_pool"
-  top: "mixed_7_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_7_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_2_conv_conv2d"
-  top: "mixed_7_tower_2_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_7_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_2_conv_conv2d_bn"
-  top: "mixed_7_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_7_chconcat"
-  type: "Concat"
-  bottom: "mixed_7_conv_conv2d_relu"
-  bottom: "mixed_7_tower_conv_2_conv2d_relu"
-  bottom: "mixed_7_tower_1_conv_4_conv2d_relu"
-  bottom: "mixed_7_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_7_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_8_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_7_chconcat"
-  top: "mixed_8_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_8_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_conv_conv2d"
-  top: "mixed_8_tower_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_8_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_conv_conv2d_bn"
-  top: "mixed_8_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_8_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_8_tower_conv_conv2d_relu"
-  top: "mixed_8_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 320
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_8_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_conv_1_conv2d"
-  top: "mixed_8_tower_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_8_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_conv_1_conv2d_bn"
-  top: "mixed_8_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_8_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_7_chconcat"
-  top: "mixed_8_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_1_conv_conv2d"
-  top: "mixed_8_tower_1_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_1_conv_conv2d_bn"
-  top: "mixed_8_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_8_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_8_tower_1_conv_conv2d_relu"
-  top: "mixed_8_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_1_conv_1_conv2d"
-  top: "mixed_8_tower_1_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_1_conv_1_conv2d_bn"
-  top: "mixed_8_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_8_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_8_tower_1_conv_1_conv2d_relu"
-  top: "mixed_8_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_1_conv_2_conv2d"
-  top: "mixed_8_tower_1_conv_2_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_1_conv_2_conv2d_bn"
-  top: "mixed_8_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_8_tower_1_conv_3_conv2d"
-  type: "Convolution"
-  bottom: "mixed_8_tower_1_conv_2_conv2d_relu"
-  top: "mixed_8_tower_1_conv_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_1_conv_3_conv2d"
-  top: "mixed_8_tower_1_conv_3_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_3_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_1_conv_3_conv2d_bn"
-  top: "mixed_8_tower_1_conv_3_conv2d_relu"
-}
-layer {
-  name: "MAX_pool_mixed_8_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_7_chconcat"
-  top: "MAX_pool_mixed_8_pool"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-    pad: 0
-  }
-}
-layer {
-  name: "ch_concat_mixed_8_chconcat"
-  type: "Concat"
-  bottom: "mixed_8_tower_conv_1_conv2d_relu"
-  bottom: "mixed_8_tower_1_conv_3_conv2d_relu"
-  bottom: "MAX_pool_mixed_8_pool"
-  top: "ch_concat_mixed_8_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_9_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_8_chconcat"
-  top: "mixed_9_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 320
-    bias_term: false
-    pad: 0
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    kernel_h: 1
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_9_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_conv_conv2d"
-  top: "mixed_9_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_9_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_conv_conv2d_bn"
-  top: "mixed_9_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_8_chconcat"
-  top: "mixed_9_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_9_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_conv_conv2d"
-  top: "mixed_9_tower_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_9_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_conv_conv2d_bn"
-  top: "mixed_9_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_conv2d"
-  type: "Convolution"
-  bottom: "mixed_9_tower_conv_conv2d_relu"
-  top: "mixed_9_tower_mixed_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 1
-    kernel_h: 1
-    kernel_w: 3
-  }
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_mixed_conv_conv2d"
-  top: "mixed_9_tower_mixed_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_mixed_conv_conv2d_bn"
-  top: "mixed_9_tower_mixed_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_9_tower_conv_conv2d_relu"
-  top: "mixed_9_tower_mixed_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 1
-    pad_w: 0
-    kernel_h: 3
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_mixed_conv_1_conv2d"
-  top: "mixed_9_tower_mixed_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_mixed_conv_1_conv2d_bn"
-  top: "mixed_9_tower_mixed_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_8_chconcat"
-  top: "mixed_9_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 448
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_9_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_1_conv_conv2d"
-  top: "mixed_9_tower_1_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_9_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_1_conv_conv2d_bn"
-  top: "mixed_9_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_9_tower_1_conv_conv2d_relu"
-  top: "mixed_9_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_9_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_1_conv_1_conv2d"
-  top: "mixed_9_tower_1_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_9_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_1_conv_1_conv2d_bn"
-  top: "mixed_9_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_conv2d"
-  type: "Convolution"
-  bottom: "mixed_9_tower_1_conv_1_conv2d_relu"
-  top: "mixed_9_tower_1_mixed_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 1
-    kernel_h: 1
-    kernel_w: 3
-  }
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_1_mixed_conv_conv2d"
-  top: "mixed_9_tower_1_mixed_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_1_mixed_conv_conv2d_bn"
-  top: "mixed_9_tower_1_mixed_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_9_tower_1_conv_1_conv2d_relu"
-  top: "mixed_9_tower_1_mixed_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 1
-    pad_w: 0
-    kernel_h: 3
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d"
-  top: "mixed_9_tower_1_mixed_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d_bn"
-  top: "mixed_9_tower_1_mixed_conv_1_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_9_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_8_chconcat"
-  top: "AVE_pool_mixed_9_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_9_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_9_pool"
-  top: "mixed_9_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_9_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_2_conv_conv2d"
-  top: "mixed_9_tower_2_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_9_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_2_conv_conv2d_bn"
-  top: "mixed_9_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_9_chconcat"
-  type: "Concat"
-  bottom: "mixed_9_conv_conv2d_relu"
-  bottom: "mixed_9_tower_mixed_conv_conv2d_relu"
-  bottom: "mixed_9_tower_mixed_conv_1_conv2d_relu"
-  bottom: "mixed_9_tower_1_mixed_conv_conv2d_relu"
-  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d_relu"
-  bottom: "mixed_9_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_9_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_10_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_9_chconcat"
-  top: "mixed_10_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 320
-    bias_term: false
-    pad: 0
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    kernel_h: 1
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_10_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_conv_conv2d"
-  top: "mixed_10_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_10_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_conv_conv2d_bn"
-  top: "mixed_10_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_9_chconcat"
-  top: "mixed_10_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_10_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_conv_conv2d"
-  top: "mixed_10_tower_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_10_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_conv_conv2d_bn"
-  top: "mixed_10_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_conv2d"
-  type: "Convolution"
-  bottom: "mixed_10_tower_conv_conv2d_relu"
-  top: "mixed_10_tower_mixed_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 1
-    kernel_h: 1
-    kernel_w: 3
-  }
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_mixed_conv_conv2d"
-  top: "mixed_10_tower_mixed_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_mixed_conv_conv2d_bn"
-  top: "mixed_10_tower_mixed_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_10_tower_conv_conv2d_relu"
-  top: "mixed_10_tower_mixed_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 1
-    pad_w: 0
-    kernel_h: 3
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_mixed_conv_1_conv2d"
-  top: "mixed_10_tower_mixed_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_mixed_conv_1_conv2d_bn"
-  top: "mixed_10_tower_mixed_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_9_chconcat"
-  top: "mixed_10_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 448
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_10_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_1_conv_conv2d"
-  top: "mixed_10_tower_1_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_10_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_1_conv_conv2d_bn"
-  top: "mixed_10_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_10_tower_1_conv_conv2d_relu"
-  top: "mixed_10_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_10_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_1_conv_1_conv2d"
-  top: "mixed_10_tower_1_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_10_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_1_conv_1_conv2d_bn"
-  top: "mixed_10_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_conv2d"
-  type: "Convolution"
-  bottom: "mixed_10_tower_1_conv_1_conv2d_relu"
-  top: "mixed_10_tower_1_mixed_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 1
-    kernel_h: 1
-    kernel_w: 3
-  }
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_1_mixed_conv_conv2d"
-  top: "mixed_10_tower_1_mixed_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_1_mixed_conv_conv2d_bn"
-  top: "mixed_10_tower_1_mixed_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_10_tower_1_conv_1_conv2d_relu"
-  top: "mixed_10_tower_1_mixed_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 1
-    pad_w: 0
-    kernel_h: 3
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d"
-  top: "mixed_10_tower_1_mixed_conv_1_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d_bn"
-  top: "mixed_10_tower_1_mixed_conv_1_conv2d_relu"
-}
-layer {
-  name: "MAX_pool_mixed_10_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_9_chconcat"
-  top: "MAX_pool_mixed_10_pool"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_10_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "MAX_pool_mixed_10_pool"
-  top: "mixed_10_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_10_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_2_conv_conv2d"
-  top: "mixed_10_tower_2_conv_conv2d_bn"
-  batch_norm_param {
-    engine: MKL2017
-  }
-}
-layer {
-  name: "mixed_10_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_2_conv_conv2d_bn"
-  top: "mixed_10_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_10_chconcat"
-  type: "Concat"
-  bottom: "mixed_10_conv_conv2d_relu"
-  bottom: "mixed_10_tower_mixed_conv_conv2d_relu"
-  bottom: "mixed_10_tower_mixed_conv_1_conv2d_relu"
-  bottom: "mixed_10_tower_1_mixed_conv_conv2d_relu"
-  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d_relu"
-  bottom: "mixed_10_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_10_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "global_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_10_chconcat"
-  top: "global_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 8
-    stride: 1
-    pad: 0
-  }
-}
-layer {
-  name: "drop"
-  type: "Dropout"
-  bottom: "global_pool"
-  top: "global_pool"
-  dropout_param {
-    dropout_ratio: 0.8
-  }
-}
-layer {
-  name: "flatten"
-  type: "Flatten"
-  bottom: "global_pool"
-  top: "flatten"
-}
-layer {
-  name: "fc1"
-  type: "InnerProduct"
-  bottom: "flatten"
-  top: "fc1"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  param {
-    lr_mult: 2.0
-    decay_mult: 0.0
-  }
-  inner_product_param {
-    num_output: 1000
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0.0
-    }
-  }
-}
-layer {
-  name: "loss"
-  type: "SoftmaxWithLoss"
-  bottom: "fc1"
-  bottom: "label"
-  top: "loss"
-}
-layer {
-  name: "loss3/top-1"
-  type: "Accuracy"
-  bottom: "fc1"
-  bottom: "label"
-  top: "loss3/top-1"
-  include {
-    phase: TEST
-  }
-}
-layer {
-  name: "loss3/top-5"
-  type: "Accuracy"
-  bottom: "fc1"
-  bottom: "label"
-  top: "loss3/top-5"
-  include {
-    phase: TEST
-  }
-  accuracy_param {
-    top_k: 5
-  }
-}
\ No newline at end of file
diff --git a/models/intel_optimized_models/googlenet_v3/mkldnn/lmdb_solver.prototxt b/models/intel_optimized_models/googlenet_v3/mkldnn/lmdb_solver.prototxt
deleted file mode 100644
index f113f0855..000000000
--- a/models/intel_optimized_models/googlenet_v3/mkldnn/lmdb_solver.prototxt
+++ /dev/null
@@ -1,16 +0,0 @@
-net: "models/intel_optimized_models/googlenet_v3/mkldnn/lmdb_train_val.prototxt"
-test_iter: 1000
-test_interval: 10000
-test_initialization: false
-display: 40
-average_loss: 40
-base_lr: 0.045
-lr_policy: "step"
-stepsize: 6400
-gamma: 0.96
-max_iter: 1200000
-momentum: 0.9
-weight_decay: 0.0002
-snapshot: 50000
-snapshot_prefix: "models/intel_optimized_models/googlenet_v3/mkldnn/googlenet_v3_mkldnn_lmdb"
-solver_mode: CPU
diff --git a/models/intel_optimized_models/googlenet_v3/mkldnn/lmdb_train_val.prototxt b/models/intel_optimized_models/googlenet_v3/mkldnn/lmdb_train_val.prototxt
deleted file mode 100644
index 6d51b7f6b..000000000
--- a/models/intel_optimized_models/googlenet_v3/mkldnn/lmdb_train_val.prototxt
+++ /dev/null
@@ -1,3594 +0,0 @@
-name: "InceptionV3"
-
-layer {
-  top: "data"
-  top: "label"
-  name: "data"
-  type: "Data"
-  data_param {
-    source: "/data/LMDB_300px/ilsvrc12_train_lmdb"
-    batch_size: 22
-    backend: LMDB
-#    shuffle: true
-  }
-  include {
-    phase: TRAIN
-  }
-  transform_param {
-    mirror: true
-    crop_size: 299
-#    mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
-    mean_value: 104
-    mean_value: 117
-    mean_value: 123
-  }
-}
-### Validation Set
-layer {
-  top: "data"
-  top: "label"
-  name: "data"
-  type: "Data"
-  data_param {
-    source: "/data/LMDB_300px/ilsvrc12_val_lmdb"
-    batch_size: 50
-    backend: LMDB
-  }
-  include {
-    phase: TEST
-  }
-  transform_param {
-    mirror: false
-    crop_size: 299
-#    mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
-    mean_value: 104
-    mean_value: 117
-    mean_value: 123
-  }
-}
-#--------------------
-
-
-layer {
-  name: "conv_conv2d"
-  type: "Convolution"
-  bottom: "data"
-  top: "conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 32
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "conv_conv2d"
-  top: "conv_conv2d_bn"
-}
-layer {
-  name: "conv_relu"
-  type: "ReLU"
-  bottom: "conv_conv2d_bn"
-  top: "conv_conv2d_relu"
-}
-layer {
-  name: "conv_1_1_conv2d"
-  type: "Convolution"
-  bottom: "conv_conv2d_relu"
-  top: "conv_1_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 32
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "conv_1_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "conv_1_1_conv2d"
-  top: "conv_1_1_conv2d_bn"
-}
-layer {
-  name: "conv_1_1_relu"
-  type: "ReLU"
-  bottom: "conv_1_1_conv2d_bn"
-  top: "conv_1_1_conv2d_relu"
-}
-layer {
-  name: "conv_2_2_conv2d"
-  type: "Convolution"
-  bottom: "conv_1_1_conv2d_relu"
-  top: "conv_2_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "conv_2_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "conv_2_2_conv2d"
-  top: "conv_2_2_conv2d_bn"
-}
-layer {
-  name: "conv_2_2_relu"
-  type: "ReLU"
-  bottom: "conv_2_2_conv2d_bn"
-  top: "conv_2_2_conv2d_relu"
-}
-layer {
-  name: "pool"
-  type: "Pooling"
-  bottom: "conv_2_2_conv2d_relu"
-  top: "pool"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-    pad: 0
-  }
-}
-layer {
-  name: "conv_3_3_conv2d"
-  type: "Convolution"
-  bottom: "pool"
-  top: "conv_3_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 80
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "conv_3_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "conv_3_3_conv2d"
-  top: "conv_3_3_conv2d_bn"
-}
-layer {
-  name: "conv_3_3_relu"
-  type: "ReLU"
-  bottom: "conv_3_3_conv2d_bn"
-  top: "conv_3_3_conv2d_relu"
-}
-layer {
-  name: "conv_4_4_conv2d"
-  type: "Convolution"
-  bottom: "conv_3_3_conv2d_relu"
-  top: "conv_4_4_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "conv_4_4_batchnorm"
-  type: "BatchNorm"
-  bottom: "conv_4_4_conv2d"
-  top: "conv_4_4_conv2d_bn"
-}
-layer {
-  name: "conv_4_4_relu"
-  type: "ReLU"
-  bottom: "conv_4_4_conv2d_bn"
-  top: "conv_4_4_conv2d_relu"
-}
-layer {
-  name: "pool1"
-  type: "Pooling"
-  bottom: "conv_4_4_conv2d_relu"
-  top: "pool1"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-    pad: 0
-  }
-}
-layer {
-  name: "mixed_conv_conv2d"
-  type: "Convolution"
-  bottom: "pool1"
-  top: "mixed_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_conv_conv2d"
-  top: "mixed_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_conv_conv2d_bn"
-  top: "mixed_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "pool1"
-  top: "mixed_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 48
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_conv_conv2d"
-  top: "mixed_tower_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_conv_conv2d_bn"
-  top: "mixed_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_tower_conv_conv2d_relu"
-  top: "mixed_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 2
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_conv_1_conv2d"
-  top: "mixed_tower_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_conv_1_conv2d_bn"
-  top: "mixed_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "pool1"
-  top: "mixed_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_1_conv_conv2d"
-  top: "mixed_tower_1_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_1_conv_conv2d_bn"
-  top: "mixed_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_tower_1_conv_conv2d_relu"
-  top: "mixed_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_1_conv_1_conv2d"
-  top: "mixed_tower_1_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_1_conv_1_conv2d_bn"
-  top: "mixed_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_tower_1_conv_1_conv2d_relu"
-  top: "mixed_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_1_conv_2_conv2d"
-  top: "mixed_tower_1_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_1_conv_2_conv2d_bn"
-  top: "mixed_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_pool"
-  type: "Pooling"
-  bottom: "pool1"
-  top: "AVE_pool_mixed_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_pool"
-  top: "mixed_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 32
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_2_conv_conv2d"
-  top: "mixed_tower_2_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_2_conv_conv2d_bn"
-  top: "mixed_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_chconcat"
-  type: "Concat"
-  bottom: "mixed_conv_conv2d_relu"
-  bottom: "mixed_tower_conv_1_conv2d_relu"
-  bottom: "mixed_tower_1_conv_2_conv2d_relu"
-  bottom: "mixed_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_chconcat"
-  top: "mixed_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_conv_conv2d"
-  top: "mixed_1_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_1_conv_conv2d_bn"
-  top: "mixed_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_1_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_chconcat"
-  top: "mixed_1_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 48
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_conv_conv2d"
-  top: "mixed_1_tower_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_1_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_conv_conv2d_bn"
-  top: "mixed_1_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_1_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_1_tower_conv_conv2d_relu"
-  top: "mixed_1_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 2
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_conv_1_conv2d"
-  top: "mixed_1_tower_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_1_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_conv_1_conv2d_bn"
-  top: "mixed_1_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_1_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_chconcat"
-  top: "mixed_1_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_1_conv_conv2d"
-  top: "mixed_1_tower_1_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_1_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_1_conv_conv2d_bn"
-  top: "mixed_1_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_1_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_1_tower_1_conv_conv2d_relu"
-  top: "mixed_1_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_1_conv_1_conv2d"
-  top: "mixed_1_tower_1_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_1_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_1_conv_1_conv2d_bn"
-  top: "mixed_1_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_1_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_1_tower_1_conv_1_conv2d_relu"
-  top: "mixed_1_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_1_conv_2_conv2d"
-  top: "mixed_1_tower_1_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_1_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_1_conv_2_conv2d_bn"
-  top: "mixed_1_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_1_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_chconcat"
-  top: "AVE_pool_mixed_1_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_1_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_1_pool"
-  top: "mixed_1_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_2_conv_conv2d"
-  top: "mixed_1_tower_2_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_1_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_2_conv_conv2d_bn"
-  top: "mixed_1_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_1_chconcat"
-  type: "Concat"
-  bottom: "mixed_1_conv_conv2d_relu"
-  bottom: "mixed_1_tower_conv_1_conv2d_relu"
-  bottom: "mixed_1_tower_1_conv_2_conv2d_relu"
-  bottom: "mixed_1_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_1_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_1_chconcat"
-  top: "mixed_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_conv_conv2d"
-  top: "mixed_2_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_2_conv_conv2d_bn"
-  top: "mixed_2_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_2_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_1_chconcat"
-  top: "mixed_2_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 48
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_conv_conv2d"
-  top: "mixed_2_tower_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_2_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_conv_conv2d_bn"
-  top: "mixed_2_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_2_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_2_tower_conv_conv2d_relu"
-  top: "mixed_2_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 2
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_conv_1_conv2d"
-  top: "mixed_2_tower_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_2_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_conv_1_conv2d_bn"
-  top: "mixed_2_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_2_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_1_chconcat"
-  top: "mixed_2_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_1_conv_conv2d"
-  top: "mixed_2_tower_1_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_2_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_1_conv_conv2d_bn"
-  top: "mixed_2_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_2_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_2_tower_1_conv_conv2d_relu"
-  top: "mixed_2_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_1_conv_1_conv2d"
-  top: "mixed_2_tower_1_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_2_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_1_conv_1_conv2d_bn"
-  top: "mixed_2_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_2_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_2_tower_1_conv_1_conv2d_relu"
-  top: "mixed_2_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_1_conv_2_conv2d"
-  top: "mixed_2_tower_1_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_2_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_1_conv_2_conv2d_bn"
-  top: "mixed_2_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_2_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_1_chconcat"
-  top: "AVE_pool_mixed_2_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_2_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_2_pool"
-  top: "mixed_2_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_2_conv_conv2d"
-  top: "mixed_2_tower_2_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_2_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_2_conv_conv2d_bn"
-  top: "mixed_2_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_2_chconcat"
-  type: "Concat"
-  bottom: "mixed_2_conv_conv2d_relu"
-  bottom: "mixed_2_tower_conv_1_conv2d_relu"
-  bottom: "mixed_2_tower_1_conv_2_conv2d_relu"
-  bottom: "mixed_2_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_2_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_3_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_2_chconcat"
-  top: "mixed_3_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_3_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_3_conv_conv2d"
-  top: "mixed_3_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_3_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_3_conv_conv2d_bn"
-  top: "mixed_3_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_3_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_2_chconcat"
-  top: "mixed_3_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_3_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_3_tower_conv_conv2d"
-  top: "mixed_3_tower_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_3_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_3_tower_conv_conv2d_bn"
-  top: "mixed_3_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_3_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_3_tower_conv_conv2d_relu"
-  top: "mixed_3_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_3_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_3_tower_conv_1_conv2d"
-  top: "mixed_3_tower_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_3_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_3_tower_conv_1_conv2d_bn"
-  top: "mixed_3_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_3_tower_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_3_tower_conv_1_conv2d_relu"
-  top: "mixed_3_tower_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_3_tower_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_3_tower_conv_2_conv2d"
-  top: "mixed_3_tower_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_3_tower_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_3_tower_conv_2_conv2d_bn"
-  top: "mixed_3_tower_conv_2_conv2d_relu"
-}
-layer {
-  name: "max_pool_mixed_3_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_2_chconcat"
-  top: "max_pool_mixed_3_pool"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-    pad: 0
-  }
-}
-layer {
-  name: "ch_concat_mixed_3_chconcat"
-  type: "Concat"
-  bottom: "max_pool_mixed_3_pool"
-  bottom: "mixed_3_conv_conv2d_relu"
-  bottom: "mixed_3_tower_conv_2_conv2d_relu"
-  top: "ch_concat_mixed_3_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_4_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_3_chconcat"
-  top: "mixed_4_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_4_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_conv_conv2d"
-  top: "mixed_4_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_4_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_4_conv_conv2d_bn"
-  top: "mixed_4_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_3_chconcat"
-  top: "mixed_4_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_4_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_conv_conv2d"
-  top: "mixed_4_tower_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_4_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_conv_conv2d_bn"
-  top: "mixed_4_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_conv_conv2d_relu"
-  top: "mixed_4_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_4_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_conv_1_conv2d"
-  top: "mixed_4_tower_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_4_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_conv_1_conv2d_bn"
-  top: "mixed_4_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_conv_1_conv2d_relu"
-  top: "mixed_4_tower_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_4_tower_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_conv_2_conv2d"
-  top: "mixed_4_tower_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_4_tower_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_conv_2_conv2d_bn"
-  top: "mixed_4_tower_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_3_chconcat"
-  top: "mixed_4_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_1_conv_conv2d"
-  top: "mixed_4_tower_1_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_4_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_1_conv_conv2d_bn"
-  top: "mixed_4_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_1_conv_conv2d_relu"
-  top: "mixed_4_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_1_conv_1_conv2d"
-  top: "mixed_4_tower_1_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_4_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_1_conv_1_conv2d_bn"
-  top: "mixed_4_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_1_conv_1_conv2d_relu"
-  top: "mixed_4_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_1_conv_2_conv2d"
-  top: "mixed_4_tower_1_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_4_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_1_conv_2_conv2d_bn"
-  top: "mixed_4_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_1_conv_3_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_1_conv_2_conv2d_relu"
-  top: "mixed_4_tower_1_conv_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_1_conv_3_conv2d"
-  top: "mixed_4_tower_1_conv_3_conv2d_bn"
-}
-layer {
-  name: "mixed_4_tower_1_conv_3_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_1_conv_3_conv2d_bn"
-  top: "mixed_4_tower_1_conv_3_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_1_conv_4_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_1_conv_3_conv2d_relu"
-  top: "mixed_4_tower_1_conv_4_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_4_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_1_conv_4_conv2d"
-  top: "mixed_4_tower_1_conv_4_conv2d_bn"
-}
-layer {
-  name: "mixed_4_tower_1_conv_4_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_1_conv_4_conv2d_bn"
-  top: "mixed_4_tower_1_conv_4_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_4_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_3_chconcat"
-  top: "AVE_pool_mixed_4_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_4_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_4_pool"
-  top: "mixed_4_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_4_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_2_conv_conv2d"
-  top: "mixed_4_tower_2_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_4_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_2_conv_conv2d_bn"
-  top: "mixed_4_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_4_chconcat"
-  type: "Concat"
-  bottom: "mixed_4_conv_conv2d_relu"
-  bottom: "mixed_4_tower_conv_2_conv2d_relu"
-  bottom: "mixed_4_tower_1_conv_4_conv2d_relu"
-  bottom: "mixed_4_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_4_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_5_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_4_chconcat"
-  top: "mixed_5_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_5_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_conv_conv2d"
-  top: "mixed_5_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_5_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_5_conv_conv2d_bn"
-  top: "mixed_5_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_4_chconcat"
-  top: "mixed_5_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_5_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_conv_conv2d"
-  top: "mixed_5_tower_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_5_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_conv_conv2d_bn"
-  top: "mixed_5_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_conv_conv2d_relu"
-  top: "mixed_5_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_5_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_conv_1_conv2d"
-  top: "mixed_5_tower_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_5_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_conv_1_conv2d_bn"
-  top: "mixed_5_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_conv_1_conv2d_relu"
-  top: "mixed_5_tower_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_5_tower_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_conv_2_conv2d"
-  top: "mixed_5_tower_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_5_tower_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_conv_2_conv2d_bn"
-  top: "mixed_5_tower_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_4_chconcat"
-  top: "mixed_5_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_1_conv_conv2d"
-  top: "mixed_5_tower_1_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_5_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_1_conv_conv2d_bn"
-  top: "mixed_5_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_1_conv_conv2d_relu"
-  top: "mixed_5_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_1_conv_1_conv2d"
-  top: "mixed_5_tower_1_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_5_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_1_conv_1_conv2d_bn"
-  top: "mixed_5_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_1_conv_1_conv2d_relu"
-  top: "mixed_5_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_1_conv_2_conv2d"
-  top: "mixed_5_tower_1_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_5_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_1_conv_2_conv2d_bn"
-  top: "mixed_5_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_1_conv_3_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_1_conv_2_conv2d_relu"
-  top: "mixed_5_tower_1_conv_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_1_conv_3_conv2d"
-  top: "mixed_5_tower_1_conv_3_conv2d_bn"
-}
-layer {
-  name: "mixed_5_tower_1_conv_3_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_1_conv_3_conv2d_bn"
-  top: "mixed_5_tower_1_conv_3_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_1_conv_4_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_1_conv_3_conv2d_relu"
-  top: "mixed_5_tower_1_conv_4_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_4_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_1_conv_4_conv2d"
-  top: "mixed_5_tower_1_conv_4_conv2d_bn"
-}
-layer {
-  name: "mixed_5_tower_1_conv_4_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_1_conv_4_conv2d_bn"
-  top: "mixed_5_tower_1_conv_4_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_5_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_4_chconcat"
-  top: "AVE_pool_mixed_5_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_5_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_5_pool"
-  top: "mixed_5_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_5_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_2_conv_conv2d"
-  top: "mixed_5_tower_2_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_5_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_2_conv_conv2d_bn"
-  top: "mixed_5_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_5_chconcat"
-  type: "Concat"
-  bottom: "mixed_5_conv_conv2d_relu"
-  bottom: "mixed_5_tower_conv_2_conv2d_relu"
-  bottom: "mixed_5_tower_1_conv_4_conv2d_relu"
-  bottom: "mixed_5_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_5_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_6_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_5_chconcat"
-  top: "mixed_6_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_6_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_conv_conv2d"
-  top: "mixed_6_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_6_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_6_conv_conv2d_bn"
-  top: "mixed_6_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_5_chconcat"
-  top: "mixed_6_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_6_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_conv_conv2d"
-  top: "mixed_6_tower_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_6_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_conv_conv2d_bn"
-  top: "mixed_6_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_conv_conv2d_relu"
-  top: "mixed_6_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_6_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_conv_1_conv2d"
-  top: "mixed_6_tower_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_6_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_conv_1_conv2d_bn"
-  top: "mixed_6_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_conv_1_conv2d_relu"
-  top: "mixed_6_tower_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_6_tower_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_conv_2_conv2d"
-  top: "mixed_6_tower_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_6_tower_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_conv_2_conv2d_bn"
-  top: "mixed_6_tower_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_5_chconcat"
-  top: "mixed_6_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_1_conv_conv2d"
-  top: "mixed_6_tower_1_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_6_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_1_conv_conv2d_bn"
-  top: "mixed_6_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_1_conv_conv2d_relu"
-  top: "mixed_6_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_1_conv_1_conv2d"
-  top: "mixed_6_tower_1_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_6_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_1_conv_1_conv2d_bn"
-  top: "mixed_6_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_1_conv_1_conv2d_relu"
-  top: "mixed_6_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_1_conv_2_conv2d"
-  top: "mixed_6_tower_1_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_6_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_1_conv_2_conv2d_bn"
-  top: "mixed_6_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_1_conv_3_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_1_conv_2_conv2d_relu"
-  top: "mixed_6_tower_1_conv_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_1_conv_3_conv2d"
-  top: "mixed_6_tower_1_conv_3_conv2d_bn"
-}
-layer {
-  name: "mixed_6_tower_1_conv_3_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_1_conv_3_conv2d_bn"
-  top: "mixed_6_tower_1_conv_3_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_1_conv_4_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_1_conv_3_conv2d_relu"
-  top: "mixed_6_tower_1_conv_4_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_4_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_1_conv_4_conv2d"
-  top: "mixed_6_tower_1_conv_4_conv2d_bn"
-}
-layer {
-  name: "mixed_6_tower_1_conv_4_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_1_conv_4_conv2d_bn"
-  top: "mixed_6_tower_1_conv_4_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_6_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_5_chconcat"
-  top: "AVE_pool_mixed_6_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_6_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_6_pool"
-  top: "mixed_6_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_6_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_2_conv_conv2d"
-  top: "mixed_6_tower_2_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_6_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_2_conv_conv2d_bn"
-  top: "mixed_6_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_6_chconcat"
-  type: "Concat"
-  bottom: "mixed_6_conv_conv2d_relu"
-  bottom: "mixed_6_tower_conv_2_conv2d_relu"
-  bottom: "mixed_6_tower_1_conv_4_conv2d_relu"
-  bottom: "mixed_6_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_6_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_7_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_6_chconcat"
-  top: "mixed_7_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_7_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_conv_conv2d"
-  top: "mixed_7_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_7_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_7_conv_conv2d_bn"
-  top: "mixed_7_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_6_chconcat"
-  top: "mixed_7_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_7_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_conv_conv2d"
-  top: "mixed_7_tower_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_7_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_conv_conv2d_bn"
-  top: "mixed_7_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_conv_conv2d_relu"
-  top: "mixed_7_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_7_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_conv_1_conv2d"
-  top: "mixed_7_tower_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_7_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_conv_1_conv2d_bn"
-  top: "mixed_7_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_conv_1_conv2d_relu"
-  top: "mixed_7_tower_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_7_tower_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_conv_2_conv2d"
-  top: "mixed_7_tower_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_7_tower_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_conv_2_conv2d_bn"
-  top: "mixed_7_tower_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_6_chconcat"
-  top: "mixed_7_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_1_conv_conv2d"
-  top: "mixed_7_tower_1_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_7_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_1_conv_conv2d_bn"
-  top: "mixed_7_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_1_conv_conv2d_relu"
-  top: "mixed_7_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_1_conv_1_conv2d"
-  top: "mixed_7_tower_1_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_7_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_1_conv_1_conv2d_bn"
-  top: "mixed_7_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_1_conv_1_conv2d_relu"
-  top: "mixed_7_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_1_conv_2_conv2d"
-  top: "mixed_7_tower_1_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_7_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_1_conv_2_conv2d_bn"
-  top: "mixed_7_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_1_conv_3_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_1_conv_2_conv2d_relu"
-  top: "mixed_7_tower_1_conv_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_1_conv_3_conv2d"
-  top: "mixed_7_tower_1_conv_3_conv2d_bn"
-}
-layer {
-  name: "mixed_7_tower_1_conv_3_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_1_conv_3_conv2d_bn"
-  top: "mixed_7_tower_1_conv_3_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_1_conv_4_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_1_conv_3_conv2d_relu"
-  top: "mixed_7_tower_1_conv_4_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_4_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_1_conv_4_conv2d"
-  top: "mixed_7_tower_1_conv_4_conv2d_bn"
-}
-layer {
-  name: "mixed_7_tower_1_conv_4_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_1_conv_4_conv2d_bn"
-  top: "mixed_7_tower_1_conv_4_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_7_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_6_chconcat"
-  top: "AVE_pool_mixed_7_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_7_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_7_pool"
-  top: "mixed_7_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_7_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_2_conv_conv2d"
-  top: "mixed_7_tower_2_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_7_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_2_conv_conv2d_bn"
-  top: "mixed_7_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_7_chconcat"
-  type: "Concat"
-  bottom: "mixed_7_conv_conv2d_relu"
-  bottom: "mixed_7_tower_conv_2_conv2d_relu"
-  bottom: "mixed_7_tower_1_conv_4_conv2d_relu"
-  bottom: "mixed_7_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_7_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_8_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_7_chconcat"
-  top: "mixed_8_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_8_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_conv_conv2d"
-  top: "mixed_8_tower_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_8_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_conv_conv2d_bn"
-  top: "mixed_8_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_8_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_8_tower_conv_conv2d_relu"
-  top: "mixed_8_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 320
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_8_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_conv_1_conv2d"
-  top: "mixed_8_tower_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_8_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_conv_1_conv2d_bn"
-  top: "mixed_8_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_8_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_7_chconcat"
-  top: "mixed_8_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_1_conv_conv2d"
-  top: "mixed_8_tower_1_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_8_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_1_conv_conv2d_bn"
-  top: "mixed_8_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_8_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_8_tower_1_conv_conv2d_relu"
-  top: "mixed_8_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_1_conv_1_conv2d"
-  top: "mixed_8_tower_1_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_8_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_1_conv_1_conv2d_bn"
-  top: "mixed_8_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_8_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_8_tower_1_conv_1_conv2d_relu"
-  top: "mixed_8_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_1_conv_2_conv2d"
-  top: "mixed_8_tower_1_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_8_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_1_conv_2_conv2d_bn"
-  top: "mixed_8_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_8_tower_1_conv_3_conv2d"
-  type: "Convolution"
-  bottom: "mixed_8_tower_1_conv_2_conv2d_relu"
-  top: "mixed_8_tower_1_conv_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_1_conv_3_conv2d"
-  top: "mixed_8_tower_1_conv_3_conv2d_bn"
-}
-layer {
-  name: "mixed_8_tower_1_conv_3_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_1_conv_3_conv2d_bn"
-  top: "mixed_8_tower_1_conv_3_conv2d_relu"
-}
-layer {
-  name: "MAX_pool_mixed_8_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_7_chconcat"
-  top: "MAX_pool_mixed_8_pool"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-    pad: 0
-  }
-}
-layer {
-  name: "ch_concat_mixed_8_chconcat"
-  type: "Concat"
-  bottom: "mixed_8_tower_conv_1_conv2d_relu"
-  bottom: "mixed_8_tower_1_conv_3_conv2d_relu"
-  bottom: "MAX_pool_mixed_8_pool"
-  top: "ch_concat_mixed_8_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_9_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_8_chconcat"
-  top: "mixed_9_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 320
-    bias_term: false
-    pad: 0
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    kernel_h: 1
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_9_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_conv_conv2d"
-  top: "mixed_9_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_9_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_conv_conv2d_bn"
-  top: "mixed_9_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_8_chconcat"
-  top: "mixed_9_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_9_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_conv_conv2d"
-  top: "mixed_9_tower_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_9_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_conv_conv2d_bn"
-  top: "mixed_9_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_conv2d"
-  type: "Convolution"
-  bottom: "mixed_9_tower_conv_conv2d_relu"
-  top: "mixed_9_tower_mixed_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 1
-    kernel_h: 1
-    kernel_w: 3
-  }
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_mixed_conv_conv2d"
-  top: "mixed_9_tower_mixed_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_mixed_conv_conv2d_bn"
-  top: "mixed_9_tower_mixed_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_9_tower_conv_conv2d_relu"
-  top: "mixed_9_tower_mixed_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 1
-    pad_w: 0
-    kernel_h: 3
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_mixed_conv_1_conv2d"
-  top: "mixed_9_tower_mixed_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_mixed_conv_1_conv2d_bn"
-  top: "mixed_9_tower_mixed_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_8_chconcat"
-  top: "mixed_9_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 448
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_9_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_1_conv_conv2d"
-  top: "mixed_9_tower_1_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_9_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_1_conv_conv2d_bn"
-  top: "mixed_9_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_9_tower_1_conv_conv2d_relu"
-  top: "mixed_9_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_9_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_1_conv_1_conv2d"
-  top: "mixed_9_tower_1_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_9_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_1_conv_1_conv2d_bn"
-  top: "mixed_9_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_conv2d"
-  type: "Convolution"
-  bottom: "mixed_9_tower_1_conv_1_conv2d_relu"
-  top: "mixed_9_tower_1_mixed_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 1
-    kernel_h: 1
-    kernel_w: 3
-  }
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_1_mixed_conv_conv2d"
-  top: "mixed_9_tower_1_mixed_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_1_mixed_conv_conv2d_bn"
-  top: "mixed_9_tower_1_mixed_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_9_tower_1_conv_1_conv2d_relu"
-  top: "mixed_9_tower_1_mixed_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 1
-    pad_w: 0
-    kernel_h: 3
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d"
-  top: "mixed_9_tower_1_mixed_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d_bn"
-  top: "mixed_9_tower_1_mixed_conv_1_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_9_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_8_chconcat"
-  top: "AVE_pool_mixed_9_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_9_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_9_pool"
-  top: "mixed_9_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_9_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_2_conv_conv2d"
-  top: "mixed_9_tower_2_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_9_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_2_conv_conv2d_bn"
-  top: "mixed_9_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_9_chconcat"
-  type: "Concat"
-  bottom: "mixed_9_conv_conv2d_relu"
-  bottom: "mixed_9_tower_mixed_conv_conv2d_relu"
-  bottom: "mixed_9_tower_mixed_conv_1_conv2d_relu"
-  bottom: "mixed_9_tower_1_mixed_conv_conv2d_relu"
-  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d_relu"
-  bottom: "mixed_9_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_9_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_10_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_9_chconcat"
-  top: "mixed_10_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 320
-    bias_term: false
-    pad: 0
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    kernel_h: 1
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_10_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_conv_conv2d"
-  top: "mixed_10_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_10_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_conv_conv2d_bn"
-  top: "mixed_10_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_9_chconcat"
-  top: "mixed_10_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_10_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_conv_conv2d"
-  top: "mixed_10_tower_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_10_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_conv_conv2d_bn"
-  top: "mixed_10_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_conv2d"
-  type: "Convolution"
-  bottom: "mixed_10_tower_conv_conv2d_relu"
-  top: "mixed_10_tower_mixed_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 1
-    kernel_h: 1
-    kernel_w: 3
-  }
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_mixed_conv_conv2d"
-  top: "mixed_10_tower_mixed_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_mixed_conv_conv2d_bn"
-  top: "mixed_10_tower_mixed_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_10_tower_conv_conv2d_relu"
-  top: "mixed_10_tower_mixed_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 1
-    pad_w: 0
-    kernel_h: 3
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_mixed_conv_1_conv2d"
-  top: "mixed_10_tower_mixed_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_mixed_conv_1_conv2d_bn"
-  top: "mixed_10_tower_mixed_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_9_chconcat"
-  top: "mixed_10_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 448
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_10_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_1_conv_conv2d"
-  top: "mixed_10_tower_1_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_10_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_1_conv_conv2d_bn"
-  top: "mixed_10_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_10_tower_1_conv_conv2d_relu"
-  top: "mixed_10_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_10_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_1_conv_1_conv2d"
-  top: "mixed_10_tower_1_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_10_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_1_conv_1_conv2d_bn"
-  top: "mixed_10_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_conv2d"
-  type: "Convolution"
-  bottom: "mixed_10_tower_1_conv_1_conv2d_relu"
-  top: "mixed_10_tower_1_mixed_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 1
-    kernel_h: 1
-    kernel_w: 3
-  }
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_1_mixed_conv_conv2d"
-  top: "mixed_10_tower_1_mixed_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_1_mixed_conv_conv2d_bn"
-  top: "mixed_10_tower_1_mixed_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_10_tower_1_conv_1_conv2d_relu"
-  top: "mixed_10_tower_1_mixed_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 1
-    pad_w: 0
-    kernel_h: 3
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d"
-  top: "mixed_10_tower_1_mixed_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d_bn"
-  top: "mixed_10_tower_1_mixed_conv_1_conv2d_relu"
-}
-layer {
-  name: "MAX_pool_mixed_10_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_9_chconcat"
-  top: "MAX_pool_mixed_10_pool"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_10_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "MAX_pool_mixed_10_pool"
-  top: "mixed_10_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_10_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_2_conv_conv2d"
-  top: "mixed_10_tower_2_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_10_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_2_conv_conv2d_bn"
-  top: "mixed_10_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_10_chconcat"
-  type: "Concat"
-  bottom: "mixed_10_conv_conv2d_relu"
-  bottom: "mixed_10_tower_mixed_conv_conv2d_relu"
-  bottom: "mixed_10_tower_mixed_conv_1_conv2d_relu"
-  bottom: "mixed_10_tower_1_mixed_conv_conv2d_relu"
-  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d_relu"
-  bottom: "mixed_10_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_10_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "global_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_10_chconcat"
-  top: "global_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 8
-    stride: 1
-    pad: 0
-  }
-}
-layer {
-  name: "drop"
-  type: "Dropout"
-  bottom: "global_pool"
-  top: "global_pool"
-  dropout_param {
-    dropout_ratio: 0.8
-  }
-}
-layer {
-  name: "flatten"
-  type: "Flatten"
-  bottom: "global_pool"
-  top: "flatten"
-}
-layer {
-  name: "fc1"
-  type: "InnerProduct"
-  bottom: "flatten"
-  top: "fc1"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  param {
-    lr_mult: 2.0
-    decay_mult: 0.0
-  }
-  inner_product_param {
-    num_output: 1000
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0.0
-    }
-  }
-}
-layer {
-  name: "loss"
-  type: "SoftmaxWithLoss"
-  bottom: "fc1"
-  bottom: "label"
-  top: "loss"
-}
-layer {
-  name: "loss3/top-1"
-  type: "Accuracy"
-  bottom: "fc1"
-  bottom: "label"
-  top: "loss3/top-1"
-  include {
-    phase: TEST
-  }
-}
-layer {
-  name: "loss3/top-5"
-  type: "Accuracy"
-  bottom: "fc1"
-  bottom: "label"
-  top: "loss3/top-5"
-  include {
-    phase: TEST
-  }
-  accuracy_param {
-    top_k: 5
-  }
-}
\ No newline at end of file
diff --git a/models/intel_optimized_models/googlenet_v3/mkldnn/nodata_solver.prototxt b/models/intel_optimized_models/googlenet_v3/mkldnn/nodata_solver.prototxt
deleted file mode 100644
index f143ca98f..000000000
--- a/models/intel_optimized_models/googlenet_v3/mkldnn/nodata_solver.prototxt
+++ /dev/null
@@ -1,16 +0,0 @@
-net: "models/intel_optimized_models/googlenet_v3/mkldnn/nodata_train_val.prototxt"
-test_iter: 1000
-test_interval: 10000
-test_initialization: false
-display: 40
-average_loss: 40
-base_lr: 0.045
-lr_policy: "step"
-stepsize: 6400
-gamma: 0.96
-max_iter: 1200000
-momentum: 0.9
-weight_decay: 0.0002
-snapshot: 50000
-snapshot_prefix: "models/intel_optimized_models/googlenet_v3/mkldnn/googlenet_v3_mkldnn_nodata"
-solver_mode: CPU
diff --git a/models/intel_optimized_models/googlenet_v3/mkldnn/nodata_train_val.prototxt b/models/intel_optimized_models/googlenet_v3/mkldnn/nodata_train_val.prototxt
deleted file mode 100644
index aa56e7dfc..000000000
--- a/models/intel_optimized_models/googlenet_v3/mkldnn/nodata_train_val.prototxt
+++ /dev/null
@@ -1,3577 +0,0 @@
-name: "InceptionV3"
-
-layer {
-  name: "data"
-  type: "DummyData"
-  top: "data"
-  include {
-    phase: TRAIN
-  }
-  dummy_data_param {
-    shape: { dim: 22 dim: 3 dim: 299 dim: 299 }
-    data_filler {
-      type: "constant"
-      value: 0.01
-    }
-  }
-}
-layer {
-  name: "data"
-  type: "DummyData"
-  top: "label"
-  include {
-    phase: TRAIN
-  }
-  dummy_data_param {
-    shape: { dim: 22  }
-    data_filler {
-      type: "constant"
-    }
-  }
-}
-#--------------------
-
-
-layer {
-  name: "conv_conv2d"
-  type: "Convolution"
-  bottom: "data"
-  top: "conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 32
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "conv_conv2d"
-  top: "conv_conv2d_bn"
-}
-layer {
-  name: "conv_relu"
-  type: "ReLU"
-  bottom: "conv_conv2d_bn"
-  top: "conv_conv2d_relu"
-}
-layer {
-  name: "conv_1_1_conv2d"
-  type: "Convolution"
-  bottom: "conv_conv2d_relu"
-  top: "conv_1_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 32
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "conv_1_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "conv_1_1_conv2d"
-  top: "conv_1_1_conv2d_bn"
-}
-layer {
-  name: "conv_1_1_relu"
-  type: "ReLU"
-  bottom: "conv_1_1_conv2d_bn"
-  top: "conv_1_1_conv2d_relu"
-}
-layer {
-  name: "conv_2_2_conv2d"
-  type: "Convolution"
-  bottom: "conv_1_1_conv2d_relu"
-  top: "conv_2_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "conv_2_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "conv_2_2_conv2d"
-  top: "conv_2_2_conv2d_bn"
-}
-layer {
-  name: "conv_2_2_relu"
-  type: "ReLU"
-  bottom: "conv_2_2_conv2d_bn"
-  top: "conv_2_2_conv2d_relu"
-}
-layer {
-  name: "pool"
-  type: "Pooling"
-  bottom: "conv_2_2_conv2d_relu"
-  top: "pool"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-    pad: 0
-  }
-}
-layer {
-  name: "conv_3_3_conv2d"
-  type: "Convolution"
-  bottom: "pool"
-  top: "conv_3_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 80
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "conv_3_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "conv_3_3_conv2d"
-  top: "conv_3_3_conv2d_bn"
-}
-layer {
-  name: "conv_3_3_relu"
-  type: "ReLU"
-  bottom: "conv_3_3_conv2d_bn"
-  top: "conv_3_3_conv2d_relu"
-}
-layer {
-  name: "conv_4_4_conv2d"
-  type: "Convolution"
-  bottom: "conv_3_3_conv2d_relu"
-  top: "conv_4_4_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "conv_4_4_batchnorm"
-  type: "BatchNorm"
-  bottom: "conv_4_4_conv2d"
-  top: "conv_4_4_conv2d_bn"
-}
-layer {
-  name: "conv_4_4_relu"
-  type: "ReLU"
-  bottom: "conv_4_4_conv2d_bn"
-  top: "conv_4_4_conv2d_relu"
-}
-layer {
-  name: "pool1"
-  type: "Pooling"
-  bottom: "conv_4_4_conv2d_relu"
-  top: "pool1"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-    pad: 0
-  }
-}
-layer {
-  name: "mixed_conv_conv2d"
-  type: "Convolution"
-  bottom: "pool1"
-  top: "mixed_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_conv_conv2d"
-  top: "mixed_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_conv_conv2d_bn"
-  top: "mixed_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "pool1"
-  top: "mixed_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 48
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_conv_conv2d"
-  top: "mixed_tower_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_conv_conv2d_bn"
-  top: "mixed_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_tower_conv_conv2d_relu"
-  top: "mixed_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 2
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_conv_1_conv2d"
-  top: "mixed_tower_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_conv_1_conv2d_bn"
-  top: "mixed_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "pool1"
-  top: "mixed_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_1_conv_conv2d"
-  top: "mixed_tower_1_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_1_conv_conv2d_bn"
-  top: "mixed_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_tower_1_conv_conv2d_relu"
-  top: "mixed_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_1_conv_1_conv2d"
-  top: "mixed_tower_1_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_1_conv_1_conv2d_bn"
-  top: "mixed_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_tower_1_conv_1_conv2d_relu"
-  top: "mixed_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_1_conv_2_conv2d"
-  top: "mixed_tower_1_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_1_conv_2_conv2d_bn"
-  top: "mixed_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_pool"
-  type: "Pooling"
-  bottom: "pool1"
-  top: "AVE_pool_mixed_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_pool"
-  top: "mixed_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 32
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_tower_2_conv_conv2d"
-  top: "mixed_tower_2_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_tower_2_conv_conv2d_bn"
-  top: "mixed_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_chconcat"
-  type: "Concat"
-  bottom: "mixed_conv_conv2d_relu"
-  bottom: "mixed_tower_conv_1_conv2d_relu"
-  bottom: "mixed_tower_1_conv_2_conv2d_relu"
-  bottom: "mixed_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_chconcat"
-  top: "mixed_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_conv_conv2d"
-  top: "mixed_1_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_1_conv_conv2d_bn"
-  top: "mixed_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_1_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_chconcat"
-  top: "mixed_1_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 48
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_conv_conv2d"
-  top: "mixed_1_tower_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_1_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_conv_conv2d_bn"
-  top: "mixed_1_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_1_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_1_tower_conv_conv2d_relu"
-  top: "mixed_1_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 2
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_conv_1_conv2d"
-  top: "mixed_1_tower_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_1_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_conv_1_conv2d_bn"
-  top: "mixed_1_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_1_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_chconcat"
-  top: "mixed_1_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_1_conv_conv2d"
-  top: "mixed_1_tower_1_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_1_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_1_conv_conv2d_bn"
-  top: "mixed_1_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_1_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_1_tower_1_conv_conv2d_relu"
-  top: "mixed_1_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_1_conv_1_conv2d"
-  top: "mixed_1_tower_1_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_1_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_1_conv_1_conv2d_bn"
-  top: "mixed_1_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_1_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_1_tower_1_conv_1_conv2d_relu"
-  top: "mixed_1_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_1_conv_2_conv2d"
-  top: "mixed_1_tower_1_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_1_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_1_conv_2_conv2d_bn"
-  top: "mixed_1_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_1_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_chconcat"
-  top: "AVE_pool_mixed_1_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_1_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_1_pool"
-  top: "mixed_1_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_1_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_1_tower_2_conv_conv2d"
-  top: "mixed_1_tower_2_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_1_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_1_tower_2_conv_conv2d_bn"
-  top: "mixed_1_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_1_chconcat"
-  type: "Concat"
-  bottom: "mixed_1_conv_conv2d_relu"
-  bottom: "mixed_1_tower_conv_1_conv2d_relu"
-  bottom: "mixed_1_tower_1_conv_2_conv2d_relu"
-  bottom: "mixed_1_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_1_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_1_chconcat"
-  top: "mixed_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_conv_conv2d"
-  top: "mixed_2_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_2_conv_conv2d_bn"
-  top: "mixed_2_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_2_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_1_chconcat"
-  top: "mixed_2_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 48
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_conv_conv2d"
-  top: "mixed_2_tower_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_2_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_conv_conv2d_bn"
-  top: "mixed_2_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_2_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_2_tower_conv_conv2d_relu"
-  top: "mixed_2_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 2
-    kernel_size: 5
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_conv_1_conv2d"
-  top: "mixed_2_tower_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_2_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_conv_1_conv2d_bn"
-  top: "mixed_2_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_2_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_1_chconcat"
-  top: "mixed_2_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_1_conv_conv2d"
-  top: "mixed_2_tower_1_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_2_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_1_conv_conv2d_bn"
-  top: "mixed_2_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_2_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_2_tower_1_conv_conv2d_relu"
-  top: "mixed_2_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_1_conv_1_conv2d"
-  top: "mixed_2_tower_1_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_2_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_1_conv_1_conv2d_bn"
-  top: "mixed_2_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_2_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_2_tower_1_conv_1_conv2d_relu"
-  top: "mixed_2_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_1_conv_2_conv2d"
-  top: "mixed_2_tower_1_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_2_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_1_conv_2_conv2d_bn"
-  top: "mixed_2_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_2_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_1_chconcat"
-  top: "AVE_pool_mixed_2_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_2_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_2_pool"
-  top: "mixed_2_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_2_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_2_tower_2_conv_conv2d"
-  top: "mixed_2_tower_2_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_2_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_2_tower_2_conv_conv2d_bn"
-  top: "mixed_2_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_2_chconcat"
-  type: "Concat"
-  bottom: "mixed_2_conv_conv2d_relu"
-  bottom: "mixed_2_tower_conv_1_conv2d_relu"
-  bottom: "mixed_2_tower_1_conv_2_conv2d_relu"
-  bottom: "mixed_2_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_2_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_3_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_2_chconcat"
-  top: "mixed_3_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_3_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_3_conv_conv2d"
-  top: "mixed_3_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_3_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_3_conv_conv2d_bn"
-  top: "mixed_3_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_3_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_2_chconcat"
-  top: "mixed_3_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 64
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_3_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_3_tower_conv_conv2d"
-  top: "mixed_3_tower_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_3_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_3_tower_conv_conv2d_bn"
-  top: "mixed_3_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_3_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_3_tower_conv_conv2d_relu"
-  top: "mixed_3_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_3_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_3_tower_conv_1_conv2d"
-  top: "mixed_3_tower_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_3_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_3_tower_conv_1_conv2d_bn"
-  top: "mixed_3_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_3_tower_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_3_tower_conv_1_conv2d_relu"
-  top: "mixed_3_tower_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 96
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_3_tower_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_3_tower_conv_2_conv2d"
-  top: "mixed_3_tower_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_3_tower_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_3_tower_conv_2_conv2d_bn"
-  top: "mixed_3_tower_conv_2_conv2d_relu"
-}
-layer {
-  name: "max_pool_mixed_3_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_2_chconcat"
-  top: "max_pool_mixed_3_pool"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-    pad: 0
-  }
-}
-layer {
-  name: "ch_concat_mixed_3_chconcat"
-  type: "Concat"
-  bottom: "max_pool_mixed_3_pool"
-  bottom: "mixed_3_conv_conv2d_relu"
-  bottom: "mixed_3_tower_conv_2_conv2d_relu"
-  top: "ch_concat_mixed_3_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_4_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_3_chconcat"
-  top: "mixed_4_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_4_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_conv_conv2d"
-  top: "mixed_4_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_4_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_4_conv_conv2d_bn"
-  top: "mixed_4_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_3_chconcat"
-  top: "mixed_4_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_4_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_conv_conv2d"
-  top: "mixed_4_tower_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_4_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_conv_conv2d_bn"
-  top: "mixed_4_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_conv_conv2d_relu"
-  top: "mixed_4_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_4_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_conv_1_conv2d"
-  top: "mixed_4_tower_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_4_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_conv_1_conv2d_bn"
-  top: "mixed_4_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_conv_1_conv2d_relu"
-  top: "mixed_4_tower_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_4_tower_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_conv_2_conv2d"
-  top: "mixed_4_tower_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_4_tower_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_conv_2_conv2d_bn"
-  top: "mixed_4_tower_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_3_chconcat"
-  top: "mixed_4_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_1_conv_conv2d"
-  top: "mixed_4_tower_1_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_4_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_1_conv_conv2d_bn"
-  top: "mixed_4_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_1_conv_conv2d_relu"
-  top: "mixed_4_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_1_conv_1_conv2d"
-  top: "mixed_4_tower_1_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_4_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_1_conv_1_conv2d_bn"
-  top: "mixed_4_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_1_conv_1_conv2d_relu"
-  top: "mixed_4_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_1_conv_2_conv2d"
-  top: "mixed_4_tower_1_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_4_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_1_conv_2_conv2d_bn"
-  top: "mixed_4_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_1_conv_3_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_1_conv_2_conv2d_relu"
-  top: "mixed_4_tower_1_conv_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 128
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_1_conv_3_conv2d"
-  top: "mixed_4_tower_1_conv_3_conv2d_bn"
-}
-layer {
-  name: "mixed_4_tower_1_conv_3_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_1_conv_3_conv2d_bn"
-  top: "mixed_4_tower_1_conv_3_conv2d_relu"
-}
-layer {
-  name: "mixed_4_tower_1_conv_4_conv2d"
-  type: "Convolution"
-  bottom: "mixed_4_tower_1_conv_3_conv2d_relu"
-  top: "mixed_4_tower_1_conv_4_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_4_tower_1_conv_4_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_1_conv_4_conv2d"
-  top: "mixed_4_tower_1_conv_4_conv2d_bn"
-}
-layer {
-  name: "mixed_4_tower_1_conv_4_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_1_conv_4_conv2d_bn"
-  top: "mixed_4_tower_1_conv_4_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_4_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_3_chconcat"
-  top: "AVE_pool_mixed_4_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_4_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_4_pool"
-  top: "mixed_4_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_4_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_4_tower_2_conv_conv2d"
-  top: "mixed_4_tower_2_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_4_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_4_tower_2_conv_conv2d_bn"
-  top: "mixed_4_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_4_chconcat"
-  type: "Concat"
-  bottom: "mixed_4_conv_conv2d_relu"
-  bottom: "mixed_4_tower_conv_2_conv2d_relu"
-  bottom: "mixed_4_tower_1_conv_4_conv2d_relu"
-  bottom: "mixed_4_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_4_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_5_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_4_chconcat"
-  top: "mixed_5_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_5_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_conv_conv2d"
-  top: "mixed_5_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_5_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_5_conv_conv2d_bn"
-  top: "mixed_5_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_4_chconcat"
-  top: "mixed_5_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_5_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_conv_conv2d"
-  top: "mixed_5_tower_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_5_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_conv_conv2d_bn"
-  top: "mixed_5_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_conv_conv2d_relu"
-  top: "mixed_5_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_5_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_conv_1_conv2d"
-  top: "mixed_5_tower_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_5_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_conv_1_conv2d_bn"
-  top: "mixed_5_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_conv_1_conv2d_relu"
-  top: "mixed_5_tower_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_5_tower_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_conv_2_conv2d"
-  top: "mixed_5_tower_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_5_tower_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_conv_2_conv2d_bn"
-  top: "mixed_5_tower_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_4_chconcat"
-  top: "mixed_5_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_1_conv_conv2d"
-  top: "mixed_5_tower_1_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_5_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_1_conv_conv2d_bn"
-  top: "mixed_5_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_1_conv_conv2d_relu"
-  top: "mixed_5_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_1_conv_1_conv2d"
-  top: "mixed_5_tower_1_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_5_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_1_conv_1_conv2d_bn"
-  top: "mixed_5_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_1_conv_1_conv2d_relu"
-  top: "mixed_5_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_1_conv_2_conv2d"
-  top: "mixed_5_tower_1_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_5_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_1_conv_2_conv2d_bn"
-  top: "mixed_5_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_1_conv_3_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_1_conv_2_conv2d_relu"
-  top: "mixed_5_tower_1_conv_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_1_conv_3_conv2d"
-  top: "mixed_5_tower_1_conv_3_conv2d_bn"
-}
-layer {
-  name: "mixed_5_tower_1_conv_3_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_1_conv_3_conv2d_bn"
-  top: "mixed_5_tower_1_conv_3_conv2d_relu"
-}
-layer {
-  name: "mixed_5_tower_1_conv_4_conv2d"
-  type: "Convolution"
-  bottom: "mixed_5_tower_1_conv_3_conv2d_relu"
-  top: "mixed_5_tower_1_conv_4_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_5_tower_1_conv_4_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_1_conv_4_conv2d"
-  top: "mixed_5_tower_1_conv_4_conv2d_bn"
-}
-layer {
-  name: "mixed_5_tower_1_conv_4_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_1_conv_4_conv2d_bn"
-  top: "mixed_5_tower_1_conv_4_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_5_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_4_chconcat"
-  top: "AVE_pool_mixed_5_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_5_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_5_pool"
-  top: "mixed_5_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_5_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_5_tower_2_conv_conv2d"
-  top: "mixed_5_tower_2_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_5_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_5_tower_2_conv_conv2d_bn"
-  top: "mixed_5_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_5_chconcat"
-  type: "Concat"
-  bottom: "mixed_5_conv_conv2d_relu"
-  bottom: "mixed_5_tower_conv_2_conv2d_relu"
-  bottom: "mixed_5_tower_1_conv_4_conv2d_relu"
-  bottom: "mixed_5_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_5_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_6_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_5_chconcat"
-  top: "mixed_6_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_6_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_conv_conv2d"
-  top: "mixed_6_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_6_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_6_conv_conv2d_bn"
-  top: "mixed_6_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_5_chconcat"
-  top: "mixed_6_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_6_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_conv_conv2d"
-  top: "mixed_6_tower_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_6_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_conv_conv2d_bn"
-  top: "mixed_6_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_conv_conv2d_relu"
-  top: "mixed_6_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_6_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_conv_1_conv2d"
-  top: "mixed_6_tower_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_6_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_conv_1_conv2d_bn"
-  top: "mixed_6_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_conv_1_conv2d_relu"
-  top: "mixed_6_tower_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_6_tower_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_conv_2_conv2d"
-  top: "mixed_6_tower_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_6_tower_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_conv_2_conv2d_bn"
-  top: "mixed_6_tower_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_5_chconcat"
-  top: "mixed_6_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_1_conv_conv2d"
-  top: "mixed_6_tower_1_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_6_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_1_conv_conv2d_bn"
-  top: "mixed_6_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_1_conv_conv2d_relu"
-  top: "mixed_6_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_1_conv_1_conv2d"
-  top: "mixed_6_tower_1_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_6_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_1_conv_1_conv2d_bn"
-  top: "mixed_6_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_1_conv_1_conv2d_relu"
-  top: "mixed_6_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_1_conv_2_conv2d"
-  top: "mixed_6_tower_1_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_6_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_1_conv_2_conv2d_bn"
-  top: "mixed_6_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_1_conv_3_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_1_conv_2_conv2d_relu"
-  top: "mixed_6_tower_1_conv_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 160
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_1_conv_3_conv2d"
-  top: "mixed_6_tower_1_conv_3_conv2d_bn"
-}
-layer {
-  name: "mixed_6_tower_1_conv_3_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_1_conv_3_conv2d_bn"
-  top: "mixed_6_tower_1_conv_3_conv2d_relu"
-}
-layer {
-  name: "mixed_6_tower_1_conv_4_conv2d"
-  type: "Convolution"
-  bottom: "mixed_6_tower_1_conv_3_conv2d_relu"
-  top: "mixed_6_tower_1_conv_4_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_6_tower_1_conv_4_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_1_conv_4_conv2d"
-  top: "mixed_6_tower_1_conv_4_conv2d_bn"
-}
-layer {
-  name: "mixed_6_tower_1_conv_4_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_1_conv_4_conv2d_bn"
-  top: "mixed_6_tower_1_conv_4_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_6_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_5_chconcat"
-  top: "AVE_pool_mixed_6_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_6_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_6_pool"
-  top: "mixed_6_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_6_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_6_tower_2_conv_conv2d"
-  top: "mixed_6_tower_2_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_6_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_6_tower_2_conv_conv2d_bn"
-  top: "mixed_6_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_6_chconcat"
-  type: "Concat"
-  bottom: "mixed_6_conv_conv2d_relu"
-  bottom: "mixed_6_tower_conv_2_conv2d_relu"
-  bottom: "mixed_6_tower_1_conv_4_conv2d_relu"
-  bottom: "mixed_6_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_6_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_7_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_6_chconcat"
-  top: "mixed_7_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_7_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_conv_conv2d"
-  top: "mixed_7_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_7_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_7_conv_conv2d_bn"
-  top: "mixed_7_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_6_chconcat"
-  top: "mixed_7_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_7_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_conv_conv2d"
-  top: "mixed_7_tower_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_7_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_conv_conv2d_bn"
-  top: "mixed_7_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_conv_conv2d_relu"
-  top: "mixed_7_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_7_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_conv_1_conv2d"
-  top: "mixed_7_tower_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_7_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_conv_1_conv2d_bn"
-  top: "mixed_7_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_conv_1_conv2d_relu"
-  top: "mixed_7_tower_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_7_tower_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_conv_2_conv2d"
-  top: "mixed_7_tower_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_7_tower_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_conv_2_conv2d_bn"
-  top: "mixed_7_tower_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_6_chconcat"
-  top: "mixed_7_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_1_conv_conv2d"
-  top: "mixed_7_tower_1_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_7_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_1_conv_conv2d_bn"
-  top: "mixed_7_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_1_conv_conv2d_relu"
-  top: "mixed_7_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_1_conv_1_conv2d"
-  top: "mixed_7_tower_1_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_7_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_1_conv_1_conv2d_bn"
-  top: "mixed_7_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_1_conv_1_conv2d_relu"
-  top: "mixed_7_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_1_conv_2_conv2d"
-  top: "mixed_7_tower_1_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_7_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_1_conv_2_conv2d_bn"
-  top: "mixed_7_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_1_conv_3_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_1_conv_2_conv2d_relu"
-  top: "mixed_7_tower_1_conv_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_1_conv_3_conv2d"
-  top: "mixed_7_tower_1_conv_3_conv2d_bn"
-}
-layer {
-  name: "mixed_7_tower_1_conv_3_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_1_conv_3_conv2d_bn"
-  top: "mixed_7_tower_1_conv_3_conv2d_relu"
-}
-layer {
-  name: "mixed_7_tower_1_conv_4_conv2d"
-  type: "Convolution"
-  bottom: "mixed_7_tower_1_conv_3_conv2d_relu"
-  top: "mixed_7_tower_1_conv_4_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_7_tower_1_conv_4_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_1_conv_4_conv2d"
-  top: "mixed_7_tower_1_conv_4_conv2d_bn"
-}
-layer {
-  name: "mixed_7_tower_1_conv_4_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_1_conv_4_conv2d_bn"
-  top: "mixed_7_tower_1_conv_4_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_7_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_6_chconcat"
-  top: "AVE_pool_mixed_7_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_7_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_7_pool"
-  top: "mixed_7_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_7_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_7_tower_2_conv_conv2d"
-  top: "mixed_7_tower_2_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_7_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_7_tower_2_conv_conv2d_bn"
-  top: "mixed_7_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_7_chconcat"
-  type: "Concat"
-  bottom: "mixed_7_conv_conv2d_relu"
-  bottom: "mixed_7_tower_conv_2_conv2d_relu"
-  bottom: "mixed_7_tower_1_conv_4_conv2d_relu"
-  bottom: "mixed_7_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_7_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_8_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_7_chconcat"
-  top: "mixed_8_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_8_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_conv_conv2d"
-  top: "mixed_8_tower_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_8_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_conv_conv2d_bn"
-  top: "mixed_8_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_8_tower_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_8_tower_conv_conv2d_relu"
-  top: "mixed_8_tower_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 320
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_8_tower_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_conv_1_conv2d"
-  top: "mixed_8_tower_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_8_tower_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_conv_1_conv2d_bn"
-  top: "mixed_8_tower_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_8_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_7_chconcat"
-  top: "mixed_8_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_1_conv_conv2d"
-  top: "mixed_8_tower_1_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_8_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_1_conv_conv2d_bn"
-  top: "mixed_8_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_8_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_8_tower_1_conv_conv2d_relu"
-  top: "mixed_8_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 3
-    kernel_h: 1
-    kernel_w: 7
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_1_conv_1_conv2d"
-  top: "mixed_8_tower_1_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_8_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_1_conv_1_conv2d_bn"
-  top: "mixed_8_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_8_tower_1_conv_2_conv2d"
-  type: "Convolution"
-  bottom: "mixed_8_tower_1_conv_1_conv2d_relu"
-  top: "mixed_8_tower_1_conv_2_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 3
-    pad_w: 0
-    kernel_h: 7
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_2_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_1_conv_2_conv2d"
-  top: "mixed_8_tower_1_conv_2_conv2d_bn"
-}
-layer {
-  name: "mixed_8_tower_1_conv_2_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_1_conv_2_conv2d_bn"
-  top: "mixed_8_tower_1_conv_2_conv2d_relu"
-}
-layer {
-  name: "mixed_8_tower_1_conv_3_conv2d"
-  type: "Convolution"
-  bottom: "mixed_8_tower_1_conv_2_conv2d_relu"
-  top: "mixed_8_tower_1_conv_3_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 3
-    stride: 2
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_8_tower_1_conv_3_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_8_tower_1_conv_3_conv2d"
-  top: "mixed_8_tower_1_conv_3_conv2d_bn"
-}
-layer {
-  name: "mixed_8_tower_1_conv_3_relu"
-  type: "ReLU"
-  bottom: "mixed_8_tower_1_conv_3_conv2d_bn"
-  top: "mixed_8_tower_1_conv_3_conv2d_relu"
-}
-layer {
-  name: "MAX_pool_mixed_8_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_7_chconcat"
-  top: "MAX_pool_mixed_8_pool"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 2
-    pad: 0
-  }
-}
-layer {
-  name: "ch_concat_mixed_8_chconcat"
-  type: "Concat"
-  bottom: "mixed_8_tower_conv_1_conv2d_relu"
-  bottom: "mixed_8_tower_1_conv_3_conv2d_relu"
-  bottom: "MAX_pool_mixed_8_pool"
-  top: "ch_concat_mixed_8_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_9_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_8_chconcat"
-  top: "mixed_9_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 320
-    bias_term: false
-    pad: 0
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    kernel_h: 1
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_9_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_conv_conv2d"
-  top: "mixed_9_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_9_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_conv_conv2d_bn"
-  top: "mixed_9_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_8_chconcat"
-  top: "mixed_9_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_9_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_conv_conv2d"
-  top: "mixed_9_tower_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_9_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_conv_conv2d_bn"
-  top: "mixed_9_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_conv2d"
-  type: "Convolution"
-  bottom: "mixed_9_tower_conv_conv2d_relu"
-  top: "mixed_9_tower_mixed_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 1
-    kernel_h: 1
-    kernel_w: 3
-  }
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_mixed_conv_conv2d"
-  top: "mixed_9_tower_mixed_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_mixed_conv_conv2d_bn"
-  top: "mixed_9_tower_mixed_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_9_tower_conv_conv2d_relu"
-  top: "mixed_9_tower_mixed_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 1
-    pad_w: 0
-    kernel_h: 3
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_mixed_conv_1_conv2d"
-  top: "mixed_9_tower_mixed_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_9_tower_mixed_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_mixed_conv_1_conv2d_bn"
-  top: "mixed_9_tower_mixed_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_8_chconcat"
-  top: "mixed_9_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 448
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_9_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_1_conv_conv2d"
-  top: "mixed_9_tower_1_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_9_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_1_conv_conv2d_bn"
-  top: "mixed_9_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_9_tower_1_conv_conv2d_relu"
-  top: "mixed_9_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_9_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_1_conv_1_conv2d"
-  top: "mixed_9_tower_1_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_9_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_1_conv_1_conv2d_bn"
-  top: "mixed_9_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_conv2d"
-  type: "Convolution"
-  bottom: "mixed_9_tower_1_conv_1_conv2d_relu"
-  top: "mixed_9_tower_1_mixed_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 1
-    kernel_h: 1
-    kernel_w: 3
-  }
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_1_mixed_conv_conv2d"
-  top: "mixed_9_tower_1_mixed_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_1_mixed_conv_conv2d_bn"
-  top: "mixed_9_tower_1_mixed_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_9_tower_1_conv_1_conv2d_relu"
-  top: "mixed_9_tower_1_mixed_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 1
-    pad_w: 0
-    kernel_h: 3
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d"
-  top: "mixed_9_tower_1_mixed_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_9_tower_1_mixed_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d_bn"
-  top: "mixed_9_tower_1_mixed_conv_1_conv2d_relu"
-}
-layer {
-  name: "AVE_pool_mixed_9_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_8_chconcat"
-  top: "AVE_pool_mixed_9_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_9_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "AVE_pool_mixed_9_pool"
-  top: "mixed_9_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_9_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_9_tower_2_conv_conv2d"
-  top: "mixed_9_tower_2_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_9_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_9_tower_2_conv_conv2d_bn"
-  top: "mixed_9_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_9_chconcat"
-  type: "Concat"
-  bottom: "mixed_9_conv_conv2d_relu"
-  bottom: "mixed_9_tower_mixed_conv_conv2d_relu"
-  bottom: "mixed_9_tower_mixed_conv_1_conv2d_relu"
-  bottom: "mixed_9_tower_1_mixed_conv_conv2d_relu"
-  bottom: "mixed_9_tower_1_mixed_conv_1_conv2d_relu"
-  bottom: "mixed_9_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_9_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "mixed_10_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_9_chconcat"
-  top: "mixed_10_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 320
-    bias_term: false
-    pad: 0
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    kernel_h: 1
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_10_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_conv_conv2d"
-  top: "mixed_10_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_10_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_conv_conv2d_bn"
-  top: "mixed_10_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_9_chconcat"
-  top: "mixed_10_tower_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_10_tower_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_conv_conv2d"
-  top: "mixed_10_tower_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_10_tower_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_conv_conv2d_bn"
-  top: "mixed_10_tower_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_conv2d"
-  type: "Convolution"
-  bottom: "mixed_10_tower_conv_conv2d_relu"
-  top: "mixed_10_tower_mixed_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 1
-    kernel_h: 1
-    kernel_w: 3
-  }
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_mixed_conv_conv2d"
-  top: "mixed_10_tower_mixed_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_mixed_conv_conv2d_bn"
-  top: "mixed_10_tower_mixed_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_10_tower_conv_conv2d_relu"
-  top: "mixed_10_tower_mixed_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 1
-    pad_w: 0
-    kernel_h: 3
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_mixed_conv_1_conv2d"
-  top: "mixed_10_tower_mixed_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_10_tower_mixed_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_mixed_conv_1_conv2d_bn"
-  top: "mixed_10_tower_mixed_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_1_conv_conv2d"
-  type: "Convolution"
-  bottom: "ch_concat_mixed_9_chconcat"
-  top: "mixed_10_tower_1_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 448
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_10_tower_1_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_1_conv_conv2d"
-  top: "mixed_10_tower_1_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_10_tower_1_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_1_conv_conv2d_bn"
-  top: "mixed_10_tower_1_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_1_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_10_tower_1_conv_conv2d_relu"
-  top: "mixed_10_tower_1_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    pad: 1
-    kernel_size: 3
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_10_tower_1_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_1_conv_1_conv2d"
-  top: "mixed_10_tower_1_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_10_tower_1_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_1_conv_1_conv2d_bn"
-  top: "mixed_10_tower_1_conv_1_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_conv2d"
-  type: "Convolution"
-  bottom: "mixed_10_tower_1_conv_1_conv2d_relu"
-  top: "mixed_10_tower_1_mixed_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 0
-    pad_w: 1
-    kernel_h: 1
-    kernel_w: 3
-  }
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_1_mixed_conv_conv2d"
-  top: "mixed_10_tower_1_mixed_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_1_mixed_conv_conv2d_bn"
-  top: "mixed_10_tower_1_mixed_conv_conv2d_relu"
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_1_conv2d"
-  type: "Convolution"
-  bottom: "mixed_10_tower_1_conv_1_conv2d_relu"
-  top: "mixed_10_tower_1_mixed_conv_1_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 384
-    bias_term: false
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-    pad_h: 1
-    pad_w: 0
-    kernel_h: 3
-    kernel_w: 1
-  }
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_1_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d"
-  top: "mixed_10_tower_1_mixed_conv_1_conv2d_bn"
-}
-layer {
-  name: "mixed_10_tower_1_mixed_conv_1_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d_bn"
-  top: "mixed_10_tower_1_mixed_conv_1_conv2d_relu"
-}
-layer {
-  name: "MAX_pool_mixed_10_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_9_chconcat"
-  top: "MAX_pool_mixed_10_pool"
-  pooling_param {
-    pool: MAX
-    kernel_size: 3
-    stride: 1
-    pad: 1
-  }
-}
-layer {
-  name: "mixed_10_tower_2_conv_conv2d"
-  type: "Convolution"
-  bottom: "MAX_pool_mixed_10_pool"
-  top: "mixed_10_tower_2_conv_conv2d"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  convolution_param {
-    num_output: 192
-    bias_term: false
-    pad: 0
-    kernel_size: 1
-    stride: 1
-    weight_filler {
-      type: "gaussian"
-      std: 0.01
-    }
-  }
-}
-layer {
-  name: "mixed_10_tower_2_conv_batchnorm"
-  type: "BatchNorm"
-  bottom: "mixed_10_tower_2_conv_conv2d"
-  top: "mixed_10_tower_2_conv_conv2d_bn"
-}
-layer {
-  name: "mixed_10_tower_2_conv_relu"
-  type: "ReLU"
-  bottom: "mixed_10_tower_2_conv_conv2d_bn"
-  top: "mixed_10_tower_2_conv_conv2d_relu"
-}
-layer {
-  name: "ch_concat_mixed_10_chconcat"
-  type: "Concat"
-  bottom: "mixed_10_conv_conv2d_relu"
-  bottom: "mixed_10_tower_mixed_conv_conv2d_relu"
-  bottom: "mixed_10_tower_mixed_conv_1_conv2d_relu"
-  bottom: "mixed_10_tower_1_mixed_conv_conv2d_relu"
-  bottom: "mixed_10_tower_1_mixed_conv_1_conv2d_relu"
-  bottom: "mixed_10_tower_2_conv_conv2d_relu"
-  top: "ch_concat_mixed_10_chconcat"
-  concat_param {
-    axis: 1
-  }
-}
-layer {
-  name: "global_pool"
-  type: "Pooling"
-  bottom: "ch_concat_mixed_10_chconcat"
-  top: "global_pool"
-  pooling_param {
-    pool: AVE
-    kernel_size: 8
-    stride: 1
-    pad: 0
-  }
-}
-layer {
-  name: "drop"
-  type: "Dropout"
-  bottom: "global_pool"
-  top: "global_pool"
-  dropout_param {
-    dropout_ratio: 0.8
-  }
-}
-layer {
-  name: "flatten"
-  type: "Flatten"
-  bottom: "global_pool"
-  top: "flatten"
-}
-layer {
-  name: "fc1"
-  type: "InnerProduct"
-  bottom: "flatten"
-  top: "fc1"
-  param {
-    lr_mult: 1.0
-    decay_mult: 1.0
-  }
-  param {
-    lr_mult: 2.0
-    decay_mult: 0.0
-  }
-  inner_product_param {
-    num_output: 1000
-    weight_filler {
-      type: "xavier"
-    }
-    bias_filler {
-      type: "constant"
-      value: 0.0
-    }
-  }
-}
-layer {
-  name: "loss"
-  type: "SoftmaxWithLoss"
-  bottom: "fc1"
-  bottom: "label"
-  top: "loss"
-}
-layer {
-  name: "loss3/top-1"
-  type: "Accuracy"
-  bottom: "fc1"
-  bottom: "label"
-  top: "loss3/top-1"
-  include {
-    phase: TEST
-  }
-}
-layer {
-  name: "loss3/top-5"
-  type: "Accuracy"
-  bottom: "fc1"
-  bottom: "label"
-  top: "loss3/top-5"
-  include {
-    phase: TEST
-  }
-  accuracy_param {
-    top_k: 5
-  }
-}
\ No newline at end of file
diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp
index 1bc68061c..82b26a9ca 100644
--- a/src/caffe/solver.cpp
+++ b/src/caffe/solver.cpp
@@ -530,11 +530,9 @@ void Solver<Dtype>::Solve(const char* resume_file) {
     LOG(INFO) << "Iteration " << iter_ << ", loss = " << smoothed_loss_;
   }
 
-#ifdef USE_MLSL
   // in multinode last test must be done after weights update
   if (param_.test_interval() && iter_ % param_.test_interval() == 0)
     TestAll();
-#endif
 
   LOG(INFO) << "Optimization Done.";
 }

From 439a1c33dc59e04c6826d2b5b89b0ebd51e0c5c3 Mon Sep 17 00:00:00 2001
From: "Gong, Jiong" <jiong.gong@intel.com>
Date: Fri, 9 Jun 2017 18:59:08 +0800
Subject: [PATCH 07/34] fix a bug of mkldnn integration: no need to call prv
 descriptor when the head is already on CPU or synced

---
 src/caffe/syncedmem.cpp | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/src/caffe/syncedmem.cpp b/src/caffe/syncedmem.cpp
index 25e978856..e825640b1 100644
--- a/src/caffe/syncedmem.cpp
+++ b/src/caffe/syncedmem.cpp
@@ -91,13 +91,6 @@ inline void SyncedMemory::to_cpu() {
     break;
   case SYNCED_PRV:
   case HEAD_AT_CPU:
-    if (prv_descriptor_.get()) {
-        if (prv_descriptor_->on_to_cpu())
-            //Fix: head_ = SYNCED means for caffe that CPU and GPU are in sync,
-            //as we do not have GPU setting, head_ to SYNCED will cause problems.
-            head_ = SYNCED_PRV;
-    }
-    break;
   case SYNCED:
     break;
   }

From d5b6220b82e7615fefb9b7c238837de1fd746792 Mon Sep 17 00:00:00 2001
From: "Yu, Chong" <chong.yu@intel.com>
Date: Fri, 9 Jun 2017 14:34:45 +0800
Subject: [PATCH 08/34] 1. Merge the fix of a bug of mkldnn integration: no
 need to call prv descriptor when the head is already on CPU or synced 2.
 Revert the workround in the dropout layer. 3. Fix the typo in the Debug info.

---
 src/caffe/layers/dropout_layer.cpp        |  9 ---------
 src/caffe/layers/mkldnn_lrn_layer.cpp     | 10 +++++-----
 src/caffe/layers/mkldnn_pooling_layer.cpp |  4 ++--
 3 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/src/caffe/layers/dropout_layer.cpp b/src/caffe/layers/dropout_layer.cpp
index a2bf24333..c23c583de 100644
--- a/src/caffe/layers/dropout_layer.cpp
+++ b/src/caffe/layers/dropout_layer.cpp
@@ -69,15 +69,6 @@ void DropoutLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
   const Dtype* bottom_data = bottom[0]->cpu_data();
   Dtype* top_data = top[0]->mutable_cpu_data();
-  // below line designated to set correspondent SyncedMemory->_head to HEAD_AT_CPU
-  // Fix the issue of "Check failed: this->_cpu_ptr == cpu_ptr (0 vs. 0x5587dfc87ec0)" (GoogleNet V1)
-  // The reason is after pooling layer: MKLDNNPoolingLayer<Dtype>::Forward_cpu: pool5/7x7_s1, the top[0]->prv_data() has value
-  // It will convert to cpu data in the dropout layer, and set the _head to HEAD_AT_CPU after executing top[0]->mutable_cpu_data()
-  // Howerver, I found top[0]->cpu_data() and top[0]->prv_data() both has value
-  // So in the inner product layer: loss3/classifier, the data will convert from bottom prv data
-  // and the reorder will change from this->_reorder_usr2prv to this->_reorder_extprv2prv_pd
-  // So eventually trigger the assertion.
-  top[0]->set_prv_data_descriptor(NULL);
   unsigned int* mask = rand_vec_.mutable_cpu_data();
   const int count = bottom[0]->count();
   if (this->phase_ == TRAIN) {
diff --git a/src/caffe/layers/mkldnn_lrn_layer.cpp b/src/caffe/layers/mkldnn_lrn_layer.cpp
index d56854a41..c5eb48d1c 100644
--- a/src/caffe/layers/mkldnn_lrn_layer.cpp
+++ b/src/caffe/layers/mkldnn_lrn_layer.cpp
@@ -258,7 +258,7 @@ void MKLDNNLRNLayer<Dtype>::InitLRNBwd(const vector<Blob<Dtype>*>& top
             = get_mkldnn_prv_descriptor<Dtype, true>(top[0]);
         memory::format bwd_prv_top_diff_mfmt = static_cast<memory::format>(mem_descr->prv_memory_pd()->desc().data.format);
 #ifdef DEBUG
-        LOG(INFO) << "MKLDNNReLULayer<Dtype>::InitLRNBwd: memory format of prv top diff is: " << bwd_prv_top_diff_mfmt;
+        LOG(INFO) << "MKLDNNLRNLayer<Dtype>::InitLRNBwd: memory format of prv top diff is: " << bwd_prv_top_diff_mfmt;
 #endif        
         top_diff_md.reset(new memory::desc(mem_descr->prv_memory_pd()->desc()));
         usr_diff_mpd = mem_descr->usr_memory_pd();
@@ -270,12 +270,12 @@ void MKLDNNLRNLayer<Dtype>::InitLRNBwd(const vector<Blob<Dtype>*>& top
                 = get_mkldnn_prv_descriptor<Dtype, false>(bottom[0]);
             memory::format fwd_prv_bottom_data_mfmt = static_cast<memory::format>(mem_descr->prv_memory_pd()->desc().data.format);
 #ifdef DEBUG
-            LOG(INFO) << "MKLDNNReLULayer<Dtype>::InitLRNBwd: memory format of prv bottom data is: " << fwd_prv_bottom_data_mfmt;
+            LOG(INFO) << "MKLDNNLRNLayer<Dtype>::InitLRNBwd: memory format of prv bottom data is: " << fwd_prv_bottom_data_mfmt;
 #endif
             if (bwd_prv_top_diff_mfmt != fwd_prv_bottom_data_mfmt)
             {
 #ifdef DEBUG
-                LOG(INFO) << "MKLDNNReLULayer<Dtype>::InitLRNBwd: Reorder the prv top/bottom diff to the format of prv bottom data! (Performance consideration)";
+                LOG(INFO) << "MKLDNNLRNLayer<Dtype>::InitLRNBwd: Reorder the prv top/bottom diff to the format of prv bottom data! (Performance consideration)";
 #endif
                 top_diff_md.reset(new memory::desc({tz}, mpcsn, fwd_prv_bottom_data_mfmt));
             }
@@ -289,8 +289,8 @@ void MKLDNNLRNLayer<Dtype>::InitLRNBwd(const vector<Blob<Dtype>*>& top
                 = get_mkldnn_prv_descriptor<Dtype, false>(bottom[0]);
             memory::format fwd_prv_bottom_data_mfmt = static_cast<memory::format>(mem_descr->prv_memory_pd()->desc().data.format);
 #ifdef DEBUG
-            LOG(INFO) << "MKLDNNReLULayer<Dtype>::InitLRNBwd: memory format of prv bottom data is: " << fwd_prv_bottom_data_mfmt;
-            LOG(INFO) << "MKLDNNReLULayer<Dtype>::InitLRNBwd: Reorder the usr top/bottom diff to the format of prv bottom data! (Performance consideration)";
+            LOG(INFO) << "MKLDNNLRNLayer<Dtype>::InitLRNBwd: memory format of prv bottom data is: " << fwd_prv_bottom_data_mfmt;
+            LOG(INFO) << "MKLDNNLRNLayer<Dtype>::InitLRNBwd: Reorder the usr top/bottom diff to the format of prv bottom data! (Performance consideration)";
 #endif
             bwd_cmfmt = fwd_prv_bottom_data_mfmt;
             //top[0]->set_prv_diff_descriptor(NULL);
diff --git a/src/caffe/layers/mkldnn_pooling_layer.cpp b/src/caffe/layers/mkldnn_pooling_layer.cpp
index 5ea0ae968..27fd4723e 100644
--- a/src/caffe/layers/mkldnn_pooling_layer.cpp
+++ b/src/caffe/layers/mkldnn_pooling_layer.cpp
@@ -371,8 +371,8 @@ void MKLDNNPoolingLayer<Dtype>::InitPoolingBwd(const vector<Blob<Dtype>*>& top
             = get_mkldnn_prv_descriptor<Dtype, false>(bottom[0]);
         memory::format fwd_prv_bottom_data_mfmt = static_cast<memory::format>(mem_descr->prv_memory_pd()->desc().data.format);
 #ifdef DEBUG
-        LOG(INFO) << "MKLDNNReLULayer<Dtype>::InitPoolingBwd: memory format of prv bottom data is: " << fwd_prv_bottom_data_mfmt;
-        LOG(INFO) << "MKLDNNReLULayer<Dtype>::InitPoolingBwd: Reorder the top and bottom diff to the format of prv bottom data! (Performance consideration)";              
+        LOG(INFO) << "MKLDNNPoolingLayer<Dtype>::InitPoolingBwd: memory format of prv bottom data is: " << fwd_prv_bottom_data_mfmt;
+        LOG(INFO) << "MKLDNNPoolingLayer<Dtype>::InitPoolingBwd: Reorder the top and bottom diff to the format of prv bottom data! (Performance consideration)";              
 #endif
         bwd_cmfmt = fwd_prv_bottom_data_mfmt;
     }

From 37c12611522b88b4feec2ffaddedc632d67fdfe2 Mon Sep 17 00:00:00 2001
From: "Gong, Jiong" <jiong.gong@intel.com>
Date: Thu, 15 Jun 2017 02:54:19 +0800
Subject: [PATCH 09/34] enable model parallelism and sync bn statistics and
 scale/shift

---
 include/caffe/layer.hpp                       |  22 ++
 include/caffe/layers/batch_norm_layer.hpp     |   4 +
 include/caffe/layers/mkl_layers.hpp           |   4 +
 include/caffe/layers/mkldnn_layers.hpp        |   3 +
 include/caffe/multinode/apply_mn_param.hpp    |  63 ++++
 include/caffe/multinode/mlsl.hpp              | 116 ++++++-
 .../caffe/multinode/mn_activation_layer.hpp   | 103 ++++++
 include/caffe/multinode/multi_sync.hpp        |  98 ++++--
 include/caffe/solver.hpp                      |   7 +
 src/caffe/layer.cpp                           |  58 ++++
 src/caffe/layers/base_conv_layer.cpp          |  13 -
 src/caffe/layers/batch_norm_layer.cpp         |   2 +-
 src/caffe/layers/bias_layer.cpp               |   9 -
 src/caffe/layers/inner_product_layer.cpp      |  12 -
 src/caffe/layers/mkl_convolution_layer.cpp    |  13 -
 src/caffe/layers/mkldnn_convolution_layer.cpp |  12 -
 src/caffe/multinode/apply_mn_param.cpp        | 315 ++++++++++++++++++
 src/caffe/multinode/mlsl.cpp                  |  37 +-
 src/caffe/multinode/mn_activation_layer.cpp   | 253 ++++++++++++++
 src/caffe/multinode/multi_solver.cpp          |  15 +-
 src/caffe/multinode/multi_sync.cpp            |   3 -
 src/caffe/net.cpp                             | 138 +++++++-
 src/caffe/proto/caffe.proto                   |  38 ++-
 src/caffe/solver.cpp                          |  23 +-
 24 files changed, 1229 insertions(+), 132 deletions(-)
 create mode 100644 include/caffe/multinode/apply_mn_param.hpp
 create mode 100644 include/caffe/multinode/mn_activation_layer.hpp
 create mode 100644 src/caffe/multinode/apply_mn_param.cpp
 create mode 100644 src/caffe/multinode/mn_activation_layer.cpp

diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp
index 132c869e5..9b535d811 100644
--- a/include/caffe/layer.hpp
+++ b/include/caffe/layer.hpp
@@ -119,6 +119,16 @@ class Layer {
 
 public:
 	MLSL::Operation *layerOp{ nullptr };
+  mn::Distribution &GetDistribution();
+
+protected:
+  virtual bool Bypass(const vector<Blob<Dtype>*>& bottom,
+                      const vector<Blob<Dtype>*>& top);
+
+  virtual void MultinodeSetUp(const vector<Blob<Dtype>*>& bottom,
+                              const vector<Blob<Dtype>*>& top);
+
+  virtual bool ParamNeedReduce(int param_id) { return true; }
 
 #endif /* USE_MLSL */
 
@@ -163,6 +173,9 @@ class Layer {
     LayerSetUp(bottom, top);
     Reshape(bottom, top);
     SetLossWeights(top);
+#ifdef USE_MLSL
+    MultinodeSetUp(bottom, top);
+#endif
   }
 
   /**
@@ -546,6 +559,12 @@ inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
   Lock();
   Dtype loss = 0;
   Reshape(bottom, top);
+#ifdef USE_MLSL
+  if (Bypass(bottom, top)) {
+    Unlock();
+    return loss;
+  }
+#endif
   switch (Caffe::mode()) {
   case Caffe::CPU:
     Forward_cpu(bottom, top);
@@ -582,6 +601,9 @@ template <typename Dtype>
 inline void Layer<Dtype>::Backward(const vector<Blob<Dtype>*>& top,
     const vector<bool>& propagate_down,
     const vector<Blob<Dtype>*>& bottom) {
+#ifdef USE_MLSL
+  if (Bypass(bottom, top)) return;
+#endif
   switch (Caffe::mode()) {
   case Caffe::CPU:
     Backward_cpu(top, propagate_down, bottom);
diff --git a/include/caffe/layers/batch_norm_layer.hpp b/include/caffe/layers/batch_norm_layer.hpp
index 130f6cbc2..2cea50aaa 100644
--- a/include/caffe/layers/batch_norm_layer.hpp
+++ b/include/caffe/layers/batch_norm_layer.hpp
@@ -114,6 +114,10 @@ class BatchNormLayer : public Layer<Dtype> {
                        const Dtype* data_to_be_replicated,
                        FuncTy op_func);
 
+#ifdef USE_MLSL
+  virtual bool ParamNeedReduce(int param_id) { return false; }
+#endif
+
   Blob<Dtype> mean_, variance_, temp_, x_norm_;
   bool use_global_stats_;
   Dtype moving_average_fraction_;
diff --git a/include/caffe/layers/mkl_layers.hpp b/include/caffe/layers/mkl_layers.hpp
index 55d98d55b..10ed1cda9 100644
--- a/include/caffe/layers/mkl_layers.hpp
+++ b/include/caffe/layers/mkl_layers.hpp
@@ -438,6 +438,10 @@ class MKLBatchNormLayer : public Layer<Dtype> {
   void Init(const vector<Blob<Dtype>*>& bottom,
             const vector<Blob<Dtype>*>& top);
 
+#ifdef USE_MLSL
+  virtual bool ParamNeedReduce(int param_id) { return param_id >= 3; }
+#endif
+
   Dtype moving_average_fraction_;
   Dtype eps_;
   bool use_weight_bias_;
diff --git a/include/caffe/layers/mkldnn_layers.hpp b/include/caffe/layers/mkldnn_layers.hpp
index 391235d4d..57be322d4 100644
--- a/include/caffe/layers/mkldnn_layers.hpp
+++ b/include/caffe/layers/mkldnn_layers.hpp
@@ -88,6 +88,9 @@ class MKLDNNBatchNormLayer : public MKLDNNLayer<Dtype>, public Layer<Dtype> {
                                 , const vector<Blob<Dtype>*>& bottom);
     virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down
                                 , const vector<Blob<Dtype>*>& bottom);
+#ifdef USE_MLSL
+    virtual bool ParamNeedReduce(int param_id) { return param_id >= 3; }
+#endif
 private:
     void InitBatchNorm(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
     void InitBatchNormBwd(const vector<Blob<Dtype>*>& top,
diff --git a/include/caffe/multinode/apply_mn_param.hpp b/include/caffe/multinode/apply_mn_param.hpp
new file mode 100644
index 000000000..df48bd83e
--- /dev/null
+++ b/include/caffe/multinode/apply_mn_param.hpp
@@ -0,0 +1,63 @@
+/*
+All modification made by Intel Corporation: © 2016 Intel Corporation
+
+All contributions by the University of California:
+Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
+All rights reserved.
+
+All other contributions:
+Copyright (c) 2014, 2015, the respective contributors
+All rights reserved.
+For the list of contributors go to https://github.com/BVLC/caffe/blob/master/CONTRIBUTORS.md
+
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _CAFFE_MULTINODE_APPLY_MN_PARAM_HPP_
+#define _CAFFE_MULTINODE_APPLY_MN_PARAM_HPP_
+
+#ifdef USE_MLSL
+
+#include "caffe/proto/caffe.pb.h"
+
+namespace caffe {
+/**
+ * @brief Apply the multinode parameters to the NetParameter
+ *        inserting mn_activation layer if needed.
+ */
+template <typename Dtype>
+void ApplyMultinodeParams(const NetParameter& param,
+    NetParameter* param_with_mn);
+
+/**
+ * @brief Revert all the multinode changes from NetParameter
+ */
+template <typename Dtype>
+void RevertMultinodeParams(NetParameter* param, bool write_diff = false);
+}
+
+#endif // USE_MLSL
+
+#endif // _CAFFE_MULTINODE_APPLY_MN_PARAM_HPP_
diff --git a/include/caffe/multinode/mlsl.hpp b/include/caffe/multinode/mlsl.hpp
index 519045e9d..b0851e9ee 100644
--- a/include/caffe/multinode/mlsl.hpp
+++ b/include/caffe/multinode/mlsl.hpp
@@ -41,6 +41,7 @@
 #ifdef USE_MLSL
 
 #include <mlsl.hpp>
+#include "caffe/common.hpp"
 
 namespace caffe {
   namespace mn {
@@ -61,11 +62,21 @@ namespace caffe {
       return MLSL::Environment::GetEnv().GetProcessCount();
     }
 
+    inline int get_group_id(int data_parts, int model_parts) {
+      int node_id = get_node_id();
+      int num_nodes = get_nodes_count();
+      return (node_id % (num_nodes / data_parts)) / model_parts;
+    }
+
     inline bool is_multinode() {
       static bool multinode{ get_nodes_count() > 1 };
       return multinode;
     }
 
+    inline bool is_root() {
+      return mn::get_node_id() == 0;
+    }
+
     namespace detail {
       template <typename Dtype>
       inline MLSL::DataType dtype();
@@ -90,8 +101,16 @@ namespace caffe {
       Distribution & operator = (const Distribution &) = delete;
       Distribution(const Distribution &) = delete;
 
-      Distribution(int dataParts, int modelParts)
-        : distrib_{ MLSL::Environment::GetEnv().CreateDistribution(dataParts, modelParts) } {
+      Distribution(int dataParts, int modelParts, int dataColor = -1, int modelColor = -1,
+                   int dataColorMax = -1, int modelColorMax = -1) :
+        data_parts_(dataParts), model_parts_(modelParts),
+        data_color_(dataColor), model_color_(modelColor),
+        data_color_max_(dataColorMax), model_color_max_(modelColorMax) {
+        if (dataColor == -1 || modelColor == -1) {
+          distrib_ = MLSL::Environment::GetEnv().CreateDistribution(dataParts, modelParts);
+        } else {
+          distrib_ = MLSL::Environment::GetEnv().CreateDistributionWithColors(dataColor, modelColor);
+        }
       }
       ~Distribution() {
         MLSL::Environment::GetEnv().DeleteDistribution(distrib_);
@@ -100,71 +119,134 @@ namespace caffe {
         return distrib_;
       }
       template <typename Dtype, MLSL::ReductionType Rtype, MLSL::GroupType Gtype>
-      void reduce(Dtype *buffer, size_t count, size_t rootIdx) {
-        MLSL::CommReq *rqts = distrib_->Reduce((void *)buffer, count, detail::dtype<Dtype>(), Rtype, rootIdx, Gtype);
+      void reduce(Dtype *sendBuffer, Dtype *recvBuffer, size_t count, size_t rootIdx = 0) {
+        if (skip_comm(Gtype)) return;
+        MLSL::CommReq *rqts = distrib_->Reduce((void *)sendBuffer, (void*)recvBuffer, count, detail::dtype<Dtype>(), Rtype, rootIdx, Gtype);
         MLSL::Environment::GetEnv().Wait(rqts);
       }
       template <typename Dtype, MLSL::GroupType Gtype>
-      void bcast(Dtype *buffer, size_t count, int rootId) {
+      void bcast(Dtype *buffer, size_t count, int rootId = 0) {
+        if (skip_comm(Gtype)) return;
         MLSL::CommReq *rqts = distrib_->Bcast((void *)buffer, count, detail::dtype<Dtype>(), rootId, Gtype);
         MLSL::Environment::GetEnv().Wait(rqts);
       }
       template <typename Dtype, MLSL::ReductionType Rtype, MLSL::GroupType Gtype>
       void allreduce(Dtype *sendBuffer, Dtype *recvBuffer, size_t count) {
+        if (skip_comm(Gtype)) return;
         MLSL::CommReq *rqts = distrib_->AllReduce((void *)sendBuffer, (void *)recvBuffer, count, detail::dtype<Dtype>(), Rtype, Gtype);
         MLSL::Environment::GetEnv().Wait(rqts);
       }
       template <typename Dtype, MLSL::ReductionType Rtype, MLSL::GroupType Gtype>
       void allreduce(Dtype *buffer, size_t count) {
+        if (skip_comm(Gtype)) return;
         MLSL::CommReq *rqts = distrib_->AllReduce((void *)buffer, (void *)buffer, count, detail::dtype<Dtype>(), Rtype, Gtype);
         MLSL::Environment::GetEnv().Wait(rqts);
       }
       template <typename Dtype, MLSL::GroupType Gtype>
-      void gather(const Dtype *sendBuffer, size_t count, Dtype *recvBuffer, size_t rootIdx) {
+      void gather(const Dtype *sendBuffer, size_t count, Dtype *recvBuffer, size_t rootIdx = 0) {
+        if (skip_comm(Gtype)) return;
         MLSL::CommReq *rqts = distrib_->Gather((void *)sendBuffer, count, (void *)recvBuffer, detail::dtype<Dtype>(), rootIdx, Gtype);
         MLSL::Environment::GetEnv().Wait(rqts);
       }
       template <typename Dtype, MLSL::GroupType Gtype>
-      void scatter(Dtype *sendBuffer, Dtype *recvBuffer, size_t count, size_t rootIdx) {
+      void scatter(Dtype *sendBuffer, Dtype *recvBuffer, size_t count, size_t rootIdx = 0) {
+        if (skip_comm(Gtype)) return;
         MLSL::CommReq *rqts = distrib_->Scatter((void *)sendBuffer, (void *)recvBuffer, count, detail::dtype<Dtype>(), rootIdx, Gtype);
         MLSL::Environment::GetEnv().Wait(rqts);
       }
+      template <typename Dtype, MLSL::ReductionType Rtype, MLSL::GroupType Gtype>
+      void reducescatter(Dtype *sendBuffer, Dtype *recvBuffer, size_t count) {
+        if (skip_comm(Gtype)) return;
+        MLSL::CommReq *rqts = distrib_->ReduceScatter(sendBuffer, recvBuffer, count, detail::dtype<Dtype>(), Rtype, Gtype);
+        MLSL::Environment::GetEnv().Wait(rqts);
+      }
+      template <typename Dtype, MLSL::GroupType Gtype>
+      void allgather(Dtype *sendBuffer, size_t count, Dtype *recvBuffer) {
+        if (skip_comm(Gtype)) return;
+        // TODO: support allgather from MLSL
+        gather<Dtype,Gtype>(sendBuffer, count, recvBuffer);
+        size_t bcast_count = count;
+        switch (Gtype) {
+        case MLSL::GT_MODEL:
+          bcast_count *= model_parts_;
+          break;
+        case MLSL::GT_DATA:
+          bcast_count *= data_parts_;
+          break;
+        case MLSL::GT_GLOBAL:
+          bcast_count *= model_parts_ * data_parts_;
+          break;
+        default:
+          NOT_IMPLEMENTED;
+        }
+        bcast<Dtype,Gtype>(recvBuffer, bcast_count);
+      }
       template <MLSL::GroupType Gtype>
       void barrier() {
+        if (skip_comm(Gtype)) return;
         distrib_->Barrier(Gtype);
       }
+      inline int get_data_parts() {
+        return data_parts_;
+      }
+      inline int get_model_parts() {
+        return model_parts_;
+      }
+      inline int get_group_id() {
+        return mn::get_group_id(data_parts_, model_parts_);
+      }
     private:
+      inline bool skip_comm(MLSL::GroupType Gtype) {
+        if (Gtype == MLSL::GT_DATA && data_color_max_ != -1) {
+          return data_color_ > data_color_max_;
+        } else if (Gtype == MLSL::GT_MODEL && model_color_max_ != -1) {
+          return model_color_ > model_color_max_;
+        } else return get_group_id() > 0;
+      }
+
       MLSL::Distribution *distrib_{ nullptr };
+      int data_parts_;
+      int model_parts_;
+      int data_color_;
+      int model_color_;
+      int data_color_max_;
+      int model_color_max_;
     };
 
-    inline Distribution & get_distrib() {
-      static Distribution distrib{ get_nodes_count(), 1 };
-      return distrib;
+    inline void GetCanonicalMnParam(int &num_nodes, int &model_parts) {
+      if (num_nodes == 0) num_nodes = mn::get_nodes_count();
+      if (model_parts == 0 || model_parts > num_nodes) model_parts = num_nodes;
     }
 
+    shared_ptr<Distribution> create_distrib(
+      int dataParts, int modelParts, int dataColor = -1, int modelColor = -1,
+      int dataColorMax = -1, int modelColorMax = -1);
+    Distribution * get_distrib(int dataParts, int modelParts);
+    Distribution * get_distrib();
+
     template <typename Dtype, MLSL::ReductionType Rtype = MLSL::RT_SUM>
     inline void allreduce(Dtype *sendBuffer, Dtype *recvBuffer, size_t count) {
-      get_distrib().allreduce<Dtype, Rtype, MLSL::GT_GLOBAL>(sendBuffer, recvBuffer, count);
+      get_distrib()->allreduce<Dtype, Rtype, MLSL::GT_GLOBAL>(sendBuffer, recvBuffer, count);
     }
     template <typename Dtype, MLSL::ReductionType Rtype = MLSL::RT_SUM>
     inline void allreduce(Dtype *buffer, size_t count) {
-      get_distrib().allreduce<Dtype, Rtype, MLSL::GT_GLOBAL>(buffer, count);
+      get_distrib()->allreduce<Dtype, Rtype, MLSL::GT_GLOBAL>(buffer, count);
     }
     template <typename Dtype, MLSL::ReductionType Rtype = MLSL::RT_SUM>
     inline void reduce(Dtype *buffer, size_t count, int rootId = 0) {
-      get_distrib().reduce<Dtype, Rtype, MLSL::GT_GLOBAL>(buffer, count, rootId);
+      get_distrib()->reduce<Dtype, Rtype, MLSL::GT_GLOBAL>(buffer, count, rootId);
     }
     template <typename Dtype>
     void bcast(Dtype *buffer, size_t count, int rootId = 0) {
-      get_distrib().bcast<Dtype, MLSL::GT_GLOBAL>(buffer, count, rootId);
+      get_distrib()->bcast<Dtype, MLSL::GT_GLOBAL>(buffer, count, rootId);
     }
     template <typename Dtype>
     inline void gather(const Dtype *sendBuffer, size_t count, Dtype *recvBuffer, int rootId = 0) {
-      get_distrib().gather<Dtype, MLSL::GT_GLOBAL>(sendBuffer, count, recvBuffer, rootId);
+      get_distrib()->gather<Dtype, MLSL::GT_GLOBAL>(sendBuffer, count, recvBuffer, rootId);
     }
     template <typename Dtype>
     inline void scatter(Dtype *sendBuffer, Dtype *recvBuffer, size_t count, int rootId = 0) {
-      get_distrib().scatter<Dtype, MLSL::GT_GLOBAL>(sendBuffer, recvBuffer, count, rootId);
+      get_distrib()->scatter<Dtype, MLSL::GT_GLOBAL>(sendBuffer, recvBuffer, count, rootId);
     }
 
     /* */
@@ -218,7 +300,7 @@ namespace caffe {
         return session;
       }
       
-      inline MLSL::Operation * add_operation(MLSL::OperationRegInfo* opRegInfo, MLSL::Distribution* distrib = get_distrib()) {
+      inline MLSL::Operation * add_operation(MLSL::OperationRegInfo* opRegInfo, MLSL::Distribution* distrib = *get_distrib()) {
         return get_session().add_operation(opRegInfo, distrib);
       }
 
diff --git a/include/caffe/multinode/mn_activation_layer.hpp b/include/caffe/multinode/mn_activation_layer.hpp
new file mode 100644
index 000000000..5ab278186
--- /dev/null
+++ b/include/caffe/multinode/mn_activation_layer.hpp
@@ -0,0 +1,103 @@
+/*
+All modification made by Intel Corporation: © 2016 Intel Corporation
+
+All contributions by the University of California:
+Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
+All rights reserved.
+
+All other contributions:
+Copyright (c) 2014, 2015, the respective contributors
+All rights reserved.
+For the list of contributors go to https://github.com/BVLC/caffe/blob/master/CONTRIBUTORS.md
+
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CAFFE_MN_ACTIVATION_LAYER_HPP_
+#define CAFFE_MN_ACTIVATION_LAYER_HPP_
+
+#ifdef USE_MLSL
+
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+namespace caffe {
+
+template <typename Dtype>
+class MnActivationLayer : public Layer<Dtype> {
+ public:
+  explicit MnActivationLayer(const LayerParameter& param)
+    : Layer<Dtype>(param) {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+  virtual inline const char* type() const { return "MnActivation"; }
+  virtual inline int MinBottomBlobs() const { return 1; }
+  virtual inline int ExactNumTopBlobs() const { return 1; }
+
+ protected:
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+  virtual bool Bypass(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+ private:
+  void Pack(const Dtype *src, Dtype *dst, int N, int C, int HW, int numC);
+  void Unpack(const Dtype *src, int N, int C, int HW, int numC, Dtype *dst);
+  bool Backward_cpu_fast(const vector<Blob<Dtype>*>& top,
+      const vector<Blob<Dtype>*>& bottom);
+  
+  int num_nodes_in_;
+  int num_nodes_out_;
+  int model_parts_in_;
+  int model_parts_out_;
+  int data_parts_in_;
+  int data_parts_out_;
+  mn::Distribution *distrib_in_;
+  mn::Distribution *distrib_out_;
+  shared_ptr<mn::Distribution> distrib_data_in_out_;
+  Blob<Dtype> top_reduce_buf_;
+  Blob<Dtype> bottom_gather_buf_;
+  Blob<Dtype> bottom_gather_work_buf_;
+};
+
+}  // namespace caffe
+
+#endif
+
+#endif  // CAFFE_MN_ACTIVATION_LAYER_HPP_
diff --git a/include/caffe/multinode/multi_sync.hpp b/include/caffe/multinode/multi_sync.hpp
index d20cd0a79..b6e29c57b 100644
--- a/include/caffe/multinode/multi_sync.hpp
+++ b/include/caffe/multinode/multi_sync.hpp
@@ -65,20 +65,18 @@ namespace caffe {
 
 #define CAN_USE_PRV(param) false //(param->prv_diff() && (param->prv_diff_count() == param->count()))
 
-  inline bool is_root() {
-    return mn::get_node_id() == 0;
-  }
-
   template <typename Dtype>
   class MultiSync : public MultiSolver<Dtype>::Callback {
 
     boost::shared_ptr<MultiSolver<Dtype>> solver;
-    int snapshot_per_iters;
 
     vector<shared_ptr<Layer<Dtype>>> layers;
     shared_ptr<Net<Dtype>> net;
     const vector<Blob<Dtype> *> &net_params;
     vector<vector<int>> layer_param_ids;
+    // layer_id -> blob_id -> cached blob to restore
+    // statistics
+    vector<vector<shared_ptr<Blob<Dtype>>>> cached_stats;
 
 #ifdef PERFORMANCE_MONITORING
     #define STATS_OUTPUT_FILE "mlsl_stats.txt"
@@ -106,18 +104,53 @@ namespace caffe {
     virtual ~MultiSync() {
     }
 
-    void snapshot() {
-      if (is_root()) {
-        solver->root_solver()->Snapshot();
+    void synchronize_parameters() {
+      LOG(INFO) << "synchronize_params: bcast";
+      for (int i = 0; i < layers.size(); i++) {
+        mn::Distribution &distrib = layers[i]->GetDistribution();
+        for (int j = 0; j < layer_param_ids[i].size(); j++) {
+          int layer_param_id = layer_param_ids[i][j];
+          distrib.bcast<Dtype,MLSL::GT_DATA>(
+            net_params[layer_param_id]->mutable_cpu_data(),
+            net_params[layer_param_id]->count());
+        }
       }
     }
 
-    void synchronize_parameters() {
-      LOG(WARNING) << "synchronize_params: bcast";
-      for (int idx = 0; idx < net_params.size(); ++idx) {
-        mn::bcast(net_params[idx]->mutable_cpu_data(), net_params[idx]->count());
+    void synchronize_statistics() {
+      cached_stats.resize(layers.size());
+      for (int i = 0; i < layers.size(); i++) {
+        if (string(layers[i]->type()) == "BatchNorm" &&
+            !layers[i]->layer_param().batch_norm_param().use_global_stats()) {
+          vector<shared_ptr<Blob<Dtype>>> cached_blobs;
+          // 3 blobs: mean, variance and scaling factor
+          for (int j = 0; j < layer_param_ids[i].size() && j < 3; j++) {
+            shared_ptr<Blob<Dtype>> b = shared_ptr<Blob<Dtype>>(new Blob<Dtype>());
+            Blob<Dtype> *net_param = net_params[layer_param_ids[i][j]];
+            b->ReshapeLike(*net_param);
+            b->CopyFrom(*net_param);
+            cached_blobs.push_back(b);
+            mn::Distribution &distrib = layers[i]->GetDistribution();
+            distrib.allreduce<Dtype,MLSL::RT_SUM,MLSL::GT_DATA>(
+              net_param->mutable_cpu_data(), net_param->mutable_cpu_data(),
+              net_param->count());
+          }
+          cached_stats[i] = cached_blobs;
+        }
       }
+    }
 
+    void restore_statistics() {
+      for (int i = 0; i < layers.size(); i++) {
+        if (string(layers[i]->type()) == "BatchNorm" &&
+          !layers[i]->layer_param().batch_norm_param().use_global_stats()) {
+          // 3 blobs: mean, variance and scaling factor
+          for (int j = 0; j < layer_param_ids[i].size() && j < 3; j++) {
+            Blob<Dtype> *net_param = net_params[layer_param_ids[i][j]];
+            net_param->CopyFrom(*cached_stats[i][j]);
+          }
+        }
+      }
     }
 
     void run() {
@@ -151,14 +184,6 @@ namespace caffe {
 #endif
     }
 
-    void check_snapshot() {
-      if (is_root()) {
-        if ((snapshot_per_iters != 0) && (solver->root_solver()->iter() % snapshot_per_iters == 0)) {
-          solver->root_solver()->Snapshot();
-        }
-      }
-    }
-
     void apply_updates(int layer_id) {
       std::vector<int> &param_ids = layer_param_ids[layer_id];
       for (int i = 0; i < param_ids.size(); ++i) {
@@ -167,7 +192,6 @@ namespace caffe {
     }
 
     void on_start() {
-      check_snapshot();
       DLOG(INFO) << "started iteration " << solver->root_solver()->iter();
     }
 
@@ -176,6 +200,7 @@ namespace caffe {
       if (layer->layerOp == nullptr) {
         return;
       }
+
       std::vector<int> &param_ids = layer_param_ids[layer_id];
       for (int i = 0; i < param_ids.size(); ++i) {
         if (CAN_USE_PRV(net_params[param_ids[i]])) {
@@ -191,28 +216,28 @@ namespace caffe {
       if (layer->layerOp == nullptr) {
         return;
       }
+
       std::vector<int> &param_ids = layer_param_ids[layer_id];
 
-      for (int i = 0; i < param_ids.size(); ++i) {
+      for (int i=0; i<param_ids.size(); i++) {
         Dtype *delwt_buf{(Dtype *) layer->layerOp->GetParameterSet(i)->WaitGradientComm()};
         if (delwt_buf) {
           if (CAN_USE_PRV(net_params[param_ids[i]])) {
             if (delwt_buf != net_params[param_ids[i]]->prv_diff())
               caffe_copy(net_params[param_ids[i]]->count(),
-                         delwt_buf,
-                         net_params[param_ids[i]]->mutable_prv_diff());
+                  delwt_buf,
+                  net_params[param_ids[i]]->mutable_prv_diff());
           } else if (delwt_buf != net_params[param_ids[i]]->cpu_diff())
             caffe_copy(net_params[param_ids[i]]->count(),
-                       delwt_buf,
-                       net_params[param_ids[i]]->mutable_cpu_diff());
-
+                delwt_buf,
+                net_params[param_ids[i]]->mutable_cpu_diff());
         }
       }
     }
 
     void on_gradients_ready() {
       DLOG(INFO) << "finished iteration " << solver->root_solver()->iter();
-      
+
 #ifdef PERFORMANCE_MONITORING
       caffe::mn::train::stats::stop();
 
@@ -240,6 +265,23 @@ namespace caffe {
 #endif //PERFORMANCE_MONITORING
     }
 
+    void on_before_test() {
+      synchronize_statistics();
+      synchronize_parameters();
+    }
+
+    void on_after_test() {
+      restore_statistics();
+    }
+
+    void on_before_snapshot() {
+      synchronize_statistics();
+    }
+
+    void on_after_snapshot() {
+      restore_statistics();
+    }
+
 #ifdef PERFORMANCE_MONITORING
     void dump_stats_to_file() {
       FILE* outputFile = fopen(STATS_OUTPUT_FILE, "w");
diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp
index 68989adb2..05413a6c9 100644
--- a/include/caffe/solver.hpp
+++ b/include/caffe/solver.hpp
@@ -120,6 +120,13 @@ class Solver {
     virtual void on_start() = 0;
     virtual void on_gradients_ready() = 0;
 
+#ifdef USE_MLSL
+    virtual void on_before_test() {}
+    virtual void on_after_test() {}
+    virtual void on_before_snapshot() {}
+    virtual void on_after_snapshot() {}
+#endif
+
     template <typename T>
     friend class Solver;
   };
diff --git a/src/caffe/layer.cpp b/src/caffe/layer.cpp
index 67d8df327..2a88ea099 100644
--- a/src/caffe/layer.cpp
+++ b/src/caffe/layer.cpp
@@ -59,6 +59,64 @@ void Layer<Dtype>::Unlock() {
   }
 }
 
+#ifdef USE_MLSL
+template <typename Dtype>
+mn::Distribution & Layer<Dtype>::GetDistribution() {
+  const MultinodeLayerParameter &mn_layer_param = layer_param_.multinode();
+  int num_nodes = mn_layer_param.num_nodes();
+  int model_parts = mn_layer_param.model_parts();
+  mn::GetCanonicalMnParam(num_nodes, model_parts);
+  return *mn::get_distrib(num_nodes/model_parts, model_parts);
+}
+
+template <typename Dtype>
+bool Layer<Dtype>::Bypass(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top) {
+  int num_nodes = layer_param_.multinode().num_nodes();
+  int model_parts = layer_param_.multinode().model_parts();
+  mn::GetCanonicalMnParam(num_nodes, model_parts);
+  int data_parts = num_nodes / model_parts;
+  return mn::get_group_id(data_parts, model_parts) > 0;
+}
+
+template <typename Dtype>
+void Layer<Dtype>::MultinodeSetUp(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top) {
+  if (this->layerOp != NULL || this->phase_ != TRAIN || Bypass(bottom, top)) {
+    return;
+  }
+
+  int num_nodes = layer_param_.multinode().num_nodes();
+  int model_parts = layer_param_.multinode().model_parts();
+  mn::GetCanonicalMnParam(num_nodes, model_parts);
+  int data_parts = num_nodes / model_parts;
+
+  if (data_parts <= 1) return;
+
+  // We only initialize data parallelism here so operation type is
+  // irrelevant here, hard-code to OT_CC
+  mn::OpRegInfo reg_info(mn::train::get_session(), MLSL::OT_CC);
+  reg_info.set_name(this->layer_param().name());
+  bool has_parameters = false;
+  for (int i = 0; i < this->blobs_.size(); i++) {
+    if (!ParamNeedReduce(i)) continue;
+    int hw = 1, ic = 1, oc = 1;
+    const vector<int> &shape = this->blobs_[i]->shape();
+    CHECK_GT(shape.size(), 0);
+    oc = shape[0];
+    if (shape.size() > 1) ic = shape[1];
+    if (shape.size() >= 4) hw = shape[2] * shape[3];
+    // Note that MLSL expects the entire weights from a model group.
+    // So we should multiply by model_parts here.
+    reg_info.add_parameter_set<Dtype>(ic * oc * model_parts, hw);
+    has_parameters = true;
+  }
+  if (has_parameters) {
+    this->layerOp = mn::train::add_operation(reg_info, this->GetDistribution());
+  }
+}
+#endif
+
 INSTANTIATE_CLASS(Layer);
 
 }  // namespace caffe
diff --git a/src/caffe/layers/base_conv_layer.cpp b/src/caffe/layers/base_conv_layer.cpp
index ca91428a9..daed130aa 100644
--- a/src/caffe/layers/base_conv_layer.cpp
+++ b/src/caffe/layers/base_conv_layer.cpp
@@ -292,19 +292,6 @@ void BaseConvolutionLayer<Dtype>::DoReshape(const vector<Blob<Dtype>*>& bottom,
     caffe_set(bias_multiplier_.count(), Dtype(1),
         bias_multiplier_.mutable_cpu_data());
   }
-
-#ifdef USE_MLSL
-  if ((this->layerOp == nullptr) && (this->phase_ == TRAIN)){
-    mn::OpRegInfo reg_info{ mn::train::get_session(), MLSL::OT_CC };
-    reg_info.set_name(this->layer_param().name());
-    reg_info.add_parameter_set<Dtype>(bottom[0]->channels() * top[0]->channels() / group_,
-                                      this->kernel_shape_.cpu_data()[0] * this->kernel_shape_.cpu_data()[1]);
-    if (bias_term_) {
-      reg_info.add_parameter_set<Dtype>(top[0]->channels(), 1);
-    }
-    this->layerOp = mn::train::add_operation(reg_info);
-  }
-#endif /* USE_MLSL */
 }
 
 template <typename Dtype>
diff --git a/src/caffe/layers/batch_norm_layer.cpp b/src/caffe/layers/batch_norm_layer.cpp
index edd07ee16..b7746d988 100644
--- a/src/caffe/layers/batch_norm_layer.cpp
+++ b/src/caffe/layers/batch_norm_layer.cpp
@@ -90,7 +90,7 @@ void BatchNormLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
   sz[0]=bottom[0]->shape(0);
   batch_sum_multiplier_.Reshape(sz);
 
-  int spatial_dim = bottom[0]->count()/(channels_*bottom[0]->shape(0));
+  int spatial_dim = bottom[0]->count(2);
   if (spatial_sum_multiplier_.num_axes() == 0 ||
       spatial_sum_multiplier_.shape(0) != spatial_dim) {
     sz[0] = spatial_dim;
diff --git a/src/caffe/layers/bias_layer.cpp b/src/caffe/layers/bias_layer.cpp
index 3d9a3b4f9..40c17a511 100644
--- a/src/caffe/layers/bias_layer.cpp
+++ b/src/caffe/layers/bias_layer.cpp
@@ -71,15 +71,6 @@ void BiasLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
     filler->Fill(this->blobs_[0].get());
   }
   this->param_propagate_down_.resize(this->blobs_.size(), true);
-
-#ifdef USE_MLSL
-  if ((this->layerOp == nullptr) && (this->phase_ == TRAIN)) {
-    mn::OpRegInfo reg_info(mn::train::get_session(), MLSL::OT_BIAS);
-    reg_info.set_name(this->layer_param_.name());
-    reg_info.add_parameter_set<Dtype>(bottom[0]->channels(), 1, false);
-    this->layerOp = mn::train::add_operation(reg_info);
-  }
-#endif /* USE_MLSL */
 }
 
 template <typename Dtype>
diff --git a/src/caffe/layers/inner_product_layer.cpp b/src/caffe/layers/inner_product_layer.cpp
index 9622b0213..ca9534a84 100644
--- a/src/caffe/layers/inner_product_layer.cpp
+++ b/src/caffe/layers/inner_product_layer.cpp
@@ -90,18 +90,6 @@ void InnerProductLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   }  // parameter initialization
   this->param_propagate_down_.resize(this->blobs_.size(), true);
 
-#ifdef USE_MLSL
-  if ((this->layerOp == nullptr) && (this->phase_ == TRAIN)) {
-    mn::OpRegInfo reg_info{ mn::train::get_session(), MLSL::OT_CC };
-    reg_info.set_name(this->layer_param().name());
-    reg_info.add_parameter_set<Dtype>(bottom[0]->count(axis) * N_, 1);
-    if (bias_term_) {
-      reg_info.add_parameter_set<Dtype>(1 * N_, 1);
-    }
-    this->layerOp = mn::train::add_operation(reg_info);
-  }
-#endif /* USE_MLSL */
-
 }
 
 template <typename Dtype>
diff --git a/src/caffe/layers/mkl_convolution_layer.cpp b/src/caffe/layers/mkl_convolution_layer.cpp
index 86b73ef25..59c74102c 100644
--- a/src/caffe/layers/mkl_convolution_layer.cpp
+++ b/src/caffe/layers/mkl_convolution_layer.cpp
@@ -324,19 +324,6 @@ void MKLConvolutionLayer<Dtype>::Init(
     bwdb_bias_diff_iter->create_layouts(convolutionBwdBias, dnnResourceDiffBias,
                                         1, bias_sizes, bias_strides);
   }
-
-#ifdef USE_MLSL
-  if ((this->layerOp == nullptr) && (this->phase_ == TRAIN)) {
-    mn::OpRegInfo reg_info{mn::train::get_session(), MLSL::OT_CC};
-    reg_info.set_name(this->layer_param_.name());
-    reg_info.add_parameter_set<Dtype>(ic * oc / g, kw * kh);
-    if (this->bias_term_) {
-      reg_info.add_parameter_set<Dtype>(oc, 1);
-    }
-    this->layerOp = mn::train::add_operation(reg_info);
-  }
-#endif /* USE_MLSL */
-
 }
 
 template <typename Dtype>
diff --git a/src/caffe/layers/mkldnn_convolution_layer.cpp b/src/caffe/layers/mkldnn_convolution_layer.cpp
index fa8e7fc15..649224f0a 100644
--- a/src/caffe/layers/mkldnn_convolution_layer.cpp
+++ b/src/caffe/layers/mkldnn_convolution_layer.cpp
@@ -93,18 +93,6 @@ void MKLDNNConvolutionLayer<Dtype>::init_properties(const vector<Blob<Dtype>*>&
     this->pad_h_ = this->pad_.cpu_data()[0];
     this->kernel_w_ = this->kernel_shape_.cpu_data()[1];
     this->kernel_h_  = this->kernel_shape_.cpu_data()[0];
-
-#ifdef USE_MLSL
-    if ((this->layerOp == nullptr) && (this->phase_ == TRAIN)) {
-      mn::OpRegInfo reg_info{ mn::train::get_session(), MLSL::OT_CC };
-      reg_info.set_name(this->layer_param_.name());
-      reg_info.add_parameter_set<Dtype>(this->channels_ * this->num_output_ / std::max(this->group_, 1), this->kernel_w_ * this->kernel_h_);
-      if (this->bias_term_) {
-        reg_info.add_parameter_set<Dtype>(this->num_output_, 1);
-      }
-      this->layerOp = mn::train::add_operation(reg_info);
-    }
-#endif /* USE_MLSL */
 }
 
 template <typename Dtype>
diff --git a/src/caffe/multinode/apply_mn_param.cpp b/src/caffe/multinode/apply_mn_param.cpp
new file mode 100644
index 000000000..1e406dc45
--- /dev/null
+++ b/src/caffe/multinode/apply_mn_param.cpp
@@ -0,0 +1,315 @@
+/*
+All modification made by Intel Corporation: © 2016 Intel Corporation
+
+All contributions by the University of California:
+Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
+All rights reserved.
+
+All other contributions:
+Copyright (c) 2014, 2015, the respective contributors
+All rights reserved.
+For the list of contributors go to https://github.com/BVLC/caffe/blob/master/CONTRIBUTORS.md
+
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef USE_MLSL
+
+#include <string>
+#include <map>
+#include <set>
+
+#include "caffe/common.hpp"
+#include "caffe/blob.hpp"
+#include "caffe/util/math_functions.hpp"
+#include "caffe/multinode/mlsl.hpp"
+#include "caffe/multinode/apply_mn_param.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void ApplyMultinodeParams(const NetParameter& param,
+    NetParameter* param_with_mn) {
+  // save per-layer global parameter mapping being applied later
+  map<string, MnModelParallelParameter> net_layer_params;
+  // aux map for inserting MnActivationLayer
+  map<string, MnActivationParameter> blob_param_map;
+  MultinodeParameter mn_param = param.multinode();
+
+  // Step 1: Identify all the layers having global net params
+  for (int param_id = 0; param_id < mn_param.model_parallel_size(); param_id++) {
+    MnModelParallelParameter model_parallel_param = mn_param.model_parallel(param_id);
+    string layer_from = model_parallel_param.layer_from();
+    string layer_to = model_parallel_param.layer_to();
+    set<string> marked_blobs;
+    for (int i = 0; i < param.layer_size(); i++) {
+      const LayerParameter& layer_param = param.layer(i);
+      bool layer_covered_by_global = false;
+      if (layer_param.name() == layer_from ||
+        layer_param.name() == layer_to) {
+        layer_covered_by_global = true;
+      } else {
+        for (int j = 0; j < layer_param.bottom_size(); j++) {
+          if (marked_blobs.find(layer_param.bottom(j)) !=
+            marked_blobs.end()) {
+            layer_covered_by_global = true;
+            break;
+          }
+        }
+      }
+      if (layer_covered_by_global) {
+        for (int j = 0; j < layer_param.top_size(); j++) {
+          marked_blobs.insert(layer_param.top(j));
+        }
+        net_layer_params[layer_param.name()] = model_parallel_param;
+      }
+      if (layer_param.name() == layer_to ||
+          layer_param.top_size() == 0) {
+        break;
+      }
+    }
+  }
+
+  // Step 2: Identify the places to insert activation layers
+  map<string, MnActivationParameter> blob_mdg_map;
+  for (int i = 0; i < param.layer_size(); i++) {
+    const LayerParameter& layer_param = param.layer(i);
+    string layer_name = layer_param.name();
+    string layer_type = layer_param.type();
+    const MultinodeLayerParameter& mn_layer_param = layer_param.multinode();
+    int num_nodes = mn_layer_param.num_nodes();
+    int model_parts = mn_layer_param.model_parts();
+    if (net_layer_params.find(layer_name) != net_layer_params.end()) {
+      MnModelParallelParameter model_parallel_param =
+        net_layer_params[layer_name];
+      num_nodes = model_parallel_param.num_nodes();
+      model_parts = model_parallel_param.model_parts();
+    }
+    for (int j = 0; j < layer_param.bottom_size(); j++) {
+      string bottom_name = layer_param.bottom(j);
+      if (blob_mdg_map.find(bottom_name) != blob_mdg_map.end()) {
+        MnActivationParameter mdg = blob_mdg_map[bottom_name];
+        mdg.set_num_nodes_out(num_nodes);
+        mdg.set_model_parts_out(model_parts);
+        int num_nodes_in = mdg.num_nodes_in();
+        int num_nodes_out = mdg.num_nodes_out();
+        int model_parts_in = mdg.model_parts_in();
+        int model_parts_out = mdg.model_parts_out();
+        mn::GetCanonicalMnParam(num_nodes_in, model_parts_in);
+        mn::GetCanonicalMnParam(num_nodes_out, model_parts_out);
+        if ((model_parts_out > 1 &&
+             (layer_type == "Convolution" || layer_type == "InnerProduct" ||
+              layer_type == "Accuracy" || layer_type == "SoftmaxWithLoss")) ||
+            num_nodes_in != num_nodes_out ||
+            model_parts_in != model_parts_out) {
+          string layer_blob_name = layer_name + "/" + layer_param.bottom(j);
+          if (layer_type == "Accuracy" || layer_type == "SoftmaxWithLoss") {
+            mdg.set_need_reduce(false);
+          }
+          blob_param_map[layer_blob_name] = mdg;
+        }
+        blob_mdg_map.erase(bottom_name);
+      }
+    }
+    for (int j = 0;  j < layer_param.top_size(); j++) {
+      MnActivationParameter mdg;
+      mdg.set_num_nodes_in(num_nodes);
+      mdg.set_model_parts_in(model_parts);
+      blob_mdg_map[layer_param.top(j)] = mdg;
+    }
+  }
+
+  // Step 3: Create the new net, apply global mn setting to each layer,
+  //         insert activation layers if needed
+  param_with_mn->CopyFrom(param);
+  param_with_mn->clear_layer();
+  for (int i = 0; i < param.layer_size(); i++) {
+    const LayerParameter& orig_layer_param = param.layer(i);
+    map<int, string> updated_blob_idx_to_name;
+    for (int j = 0; j < orig_layer_param.bottom_size(); j++) {
+      const string& bottom_blob_name = orig_layer_param.bottom(j);
+      string layer_blob_name = orig_layer_param.name() + "/" + bottom_blob_name;
+      if (blob_param_map.find(layer_blob_name) != blob_param_map.end()) {
+        LayerParameter* mn_activation_layer_param =
+          param_with_mn->add_layer();
+        string new_name = "mn_activation/" + layer_blob_name;
+        mn_activation_layer_param->Clear();
+        mn_activation_layer_param->set_name(new_name);
+        mn_activation_layer_param->set_type("MnActivation");
+        mn_activation_layer_param->add_bottom(bottom_blob_name);
+        mn_activation_layer_param->add_top(new_name);
+        MnActivationParameter *mn_activation_param =
+          mn_activation_layer_param->mutable_mn_activation_param();
+        *mn_activation_param = blob_param_map[layer_blob_name];
+        updated_blob_idx_to_name[j] = new_name;
+      }
+    }
+    LayerParameter* layer_param = param_with_mn->add_layer();
+    layer_param->CopyFrom(orig_layer_param);
+    // Apply global mn setting
+    if (net_layer_params.find(layer_param->name()) != net_layer_params.end()) {
+      MultinodeLayerParameter *mn_layer_param = layer_param->mutable_multinode();
+      const MnModelParallelParameter &mn_param = net_layer_params[layer_param->name()];
+      mn_layer_param->set_num_nodes(mn_param.num_nodes());
+      mn_layer_param->set_model_parts(mn_param.model_parts());
+    }
+    const MultinodeLayerParameter &mn_layer_param = layer_param->multinode();
+    int num_nodes = mn_layer_param.num_nodes();
+    int model_parts = mn_layer_param.model_parts();
+    mn::GetCanonicalMnParam(num_nodes, model_parts);
+    if (model_parts > 1) {
+      // TODO: support transpose
+      // TODO: support undividible num_output
+      if (layer_param->type() == "Convolution") {
+        ConvolutionParameter *conv_param = layer_param->mutable_convolution_param();
+        int new_num_output = conv_param->num_output() / model_parts;
+        CHECK_EQ(conv_param->num_output(), model_parts * new_num_output)
+          << "Convolution layer " << layer_param->name()
+          << ": Undividible num_output " << conv_param->num_output()
+          << " by model_parts " << model_parts;
+        conv_param->set_num_output(new_num_output);
+      } else if (layer_param->type() == "InnerProduct") {
+        InnerProductParameter *ip_param = layer_param->mutable_inner_product_param();
+        int new_num_output = ip_param->num_output() / model_parts;
+        CHECK_EQ(ip_param->num_output(), model_parts * new_num_output)
+          << "InnerProduct layer " << layer_param->name()
+          << ": Undividible num_output " << ip_param->num_output()
+          << " by model_parts " << model_parts;
+        ip_param->set_num_output(ip_param->num_output() / model_parts);
+        CHECK(!ip_param->transpose()) << "Model parallelism does not support transpose!";
+      }
+      for (int j = 0; j < layer_param->blobs_size(); j++) {
+        Blob<Dtype> blob;
+        Blob<Dtype> new_blob;
+        const BlobProto &proto = layer_param->blobs(j);
+        blob.FromProto(proto);
+        vector<int> shape = blob.shape();
+        new_blob.Reshape(shape);
+        if (shape.size() > 0) {
+          if (proto.has_num() || proto.has_channels() ||
+              proto.has_height() || proto.has_width()) {
+            // deprecated 4D blob
+            if (layer_param->type() == "InnerProduct") {
+              CHECK_EQ(shape.size(), 4);
+              CHECK_EQ(shape[0], 1);
+              CHECK_EQ(shape[1], 1);
+              if (shape[2] == 1) {
+                shape.resize(1);
+                shape[0] = blob.shape(3);
+              } else {
+                shape.resize(2);
+                shape[0] = blob.shape(2);
+                shape[1] = blob.shape(3);
+              }
+              new_blob.Reshape(shape);
+            }
+          }
+          int count = blob.count() / model_parts;
+          int offset = count * (mn::get_node_id() % model_parts);
+          shape[0] /= model_parts;
+          new_blob.Reshape(shape);
+          caffe_copy(count, blob.cpu_data() + offset, new_blob.mutable_cpu_data());
+          caffe_copy(count, blob.cpu_diff() + offset, new_blob.mutable_cpu_diff());
+          BlobProto *updated_blob_proto = layer_param->mutable_blobs(j);
+          updated_blob_proto->Clear();
+          new_blob.ToProto(updated_blob_proto, true);
+        }
+      }
+    }
+    for (int j = 0; j < orig_layer_param.bottom_size(); j++) {
+      if (updated_blob_idx_to_name.find(j) != updated_blob_idx_to_name.end()) {
+        layer_param->set_bottom(j, updated_blob_idx_to_name[j]);
+      }
+    }
+  }
+}
+
+template <typename Dtype>
+void RevertMultinodeParams(NetParameter* param, bool write_diff) {
+  NetParameter orig_param;
+  orig_param.CopyFrom(*param);
+  param->clear_layer();
+  for (int i = 0; i < orig_param.layer_size(); i++) {
+    const LayerParameter& orig_layer_param = orig_param.layer(i);
+    if (orig_layer_param.type() == "MnActivation") continue;
+    LayerParameter* layer_param = param->add_layer();
+    layer_param->CopyFrom(orig_layer_param);
+    layer_param->clear_bottom();
+    for (int j = 0; j < orig_layer_param.bottom_size(); j++) {
+      string bottom_name = orig_layer_param.bottom(j);
+      string prefix = "mn_activation/" + orig_layer_param.name() + "/";
+      if (bottom_name.find(prefix) == 0) {
+        bottom_name = bottom_name.substr(prefix.size());
+      }
+      layer_param->add_bottom(bottom_name);
+    }
+    const MultinodeLayerParameter &mn_layer_param = orig_layer_param.multinode();
+    int num_nodes = mn_layer_param.num_nodes();
+    int model_parts = mn_layer_param.model_parts();
+    mn::GetCanonicalMnParam(num_nodes, model_parts);
+    if (model_parts > 1) {
+      if (layer_param->type() == "Convolution") {
+        ConvolutionParameter *conv_param = layer_param->mutable_convolution_param();
+        conv_param->set_num_output(conv_param->num_output() * model_parts);
+      } else if (layer_param->type() == "InnerProduct") {
+        InnerProductParameter *ip_param = layer_param->mutable_inner_product_param();
+        ip_param->set_num_output(ip_param->num_output() * model_parts);
+        CHECK(!ip_param->transpose()) << "Model parallelism does not support transpose!";
+      }
+      layer_param->clear_blobs();
+      for (int j = 0; j < orig_layer_param.blobs_size(); j++) {
+        BlobProto *blob_proto = layer_param->add_blobs();
+        Blob<Dtype> orig_blob;
+        orig_blob.FromProto(orig_layer_param.blobs(j));
+        vector<int> shape = orig_blob.shape();
+        Blob<Dtype> new_blob;
+        if (shape.size() > 0) {
+          mn::Distribution *distrib = mn::get_distrib(num_nodes/model_parts, model_parts);
+          int count = orig_blob.count();
+          shape[0] *= model_parts;
+          new_blob.Reshape(shape);
+          distrib->allgather<Dtype,MLSL::GT_MODEL>(
+            orig_blob.mutable_cpu_data(), count, new_blob.mutable_cpu_data());
+          if (write_diff) {
+            distrib->allgather<Dtype,MLSL::GT_MODEL>(
+              orig_blob.mutable_cpu_diff(), count, new_blob.mutable_cpu_diff());
+          }
+        }
+        new_blob.ToProto(blob_proto, write_diff);
+      }
+    }
+    layer_param->mutable_multinode()->Clear();
+  }
+}
+
+template void ApplyMultinodeParams<float>(const NetParameter& param,
+    NetParameter* param_with_mn);
+template void ApplyMultinodeParams<double>(const NetParameter& param,
+    NetParameter* param_with_mn);
+template void RevertMultinodeParams<float>(NetParameter* param, bool write_diff);
+template void RevertMultinodeParams<double>(NetParameter* param, bool write_diff);
+} // namespace caffe
+
+#endif // USE_MLSL
diff --git a/src/caffe/multinode/mlsl.cpp b/src/caffe/multinode/mlsl.cpp
index e6a333847..1653c5692 100644
--- a/src/caffe/multinode/mlsl.cpp
+++ b/src/caffe/multinode/mlsl.cpp
@@ -37,7 +37,9 @@
 
 #ifdef USE_MLSL
 
-#include <mlsl.hpp>
+#include <map>
+#include "boost/thread/mutex.hpp"
+#include "caffe/multinode/mlsl.hpp"
 
 namespace {
 
@@ -55,4 +57,37 @@ namespace {
   }
 }
 
+namespace caffe {
+  namespace mn {
+    boost::mutex distrib_lock;
+    std::map<std::pair<int,int>, boost::shared_ptr<Distribution>> distrib_map;
+    
+    shared_ptr<Distribution> create_distrib(
+      int dataParts, int modelParts, int dataColor, int modelColor,
+      int dataColorMax, int modelColorMax) {
+      return shared_ptr<Distribution>(
+        new Distribution(dataParts, modelParts, dataColor, modelColor,
+                         dataColorMax, modelColorMax));
+    }
+
+    Distribution * get_distrib(int dataParts, int modelParts) {
+      boost::mutex::scoped_lock l(distrib_lock);
+      std::pair<int,int> key = std::make_pair(dataParts, modelParts);
+      if (distrib_map.find(key) == distrib_map.end()) {
+        int node_id = get_node_id();
+        int num_nodes = get_nodes_count();
+        int modelColor = node_id / modelParts;
+        int dataColor = node_id % (num_nodes / dataParts);
+        distrib_map[key] = boost::shared_ptr<Distribution>(
+          new Distribution(dataParts, modelParts, dataColor, modelColor));
+      }
+      return distrib_map[key].get();
+    }
+
+    Distribution * get_distrib() {
+      return get_distrib(get_nodes_count(), 1);
+    }
+  }
+}
+
 #endif /* USE_MLSL */
diff --git a/src/caffe/multinode/mn_activation_layer.cpp b/src/caffe/multinode/mn_activation_layer.cpp
new file mode 100644
index 000000000..ad37ec191
--- /dev/null
+++ b/src/caffe/multinode/mn_activation_layer.cpp
@@ -0,0 +1,253 @@
+/*
+All modification made by Intel Corporation: © 2016 Intel Corporation
+
+All contributions by the University of California:
+Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
+All rights reserved.
+
+All other contributions:
+Copyright (c) 2014, 2015, the respective contributors
+All rights reserved.
+For the list of contributors go to https://github.com/BVLC/caffe/blob/master/CONTRIBUTORS.md
+
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef USE_MLSL
+
+#include "caffe/multinode/mn_activation_layer.hpp"
+#include "caffe/multinode/mlsl.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void MnActivationLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  MnActivationParameter param = this->layer_param_.mn_activation_param();
+  num_nodes_in_ = param.num_nodes_in();
+  num_nodes_out_ = param.num_nodes_out();
+  model_parts_in_ = param.model_parts_in();
+  model_parts_out_ = param.model_parts_out();
+  mn::GetCanonicalMnParam(num_nodes_in_, model_parts_in_);
+  mn::GetCanonicalMnParam(num_nodes_out_, model_parts_out_);
+  data_parts_in_ = num_nodes_in_ / model_parts_in_;
+  data_parts_out_ = num_nodes_out_ / model_parts_out_;
+  
+  CHECK_EQ(num_nodes_in_, data_parts_in_ * model_parts_in_);
+  CHECK_EQ(num_nodes_out_, data_parts_out_ * model_parts_out_);
+  CHECK(data_parts_in_  != data_parts_out_  ||
+        model_parts_in_ != model_parts_out_ ||
+        model_parts_in_ > 1);
+  
+  distrib_in_ = mn::get_distrib(data_parts_in_, model_parts_in_);
+  distrib_out_ = mn::get_distrib(data_parts_out_, model_parts_out_);
+
+  if (data_parts_in_ != data_parts_out_) {
+    int num_nodes = mn::get_nodes_count();
+    int node_id = mn::get_node_id();
+    int data_parts_max = std::max(data_parts_in_, data_parts_out_);
+    int data_parts_min = std::min(data_parts_in_, data_parts_out_);
+    int num_data_groups = num_nodes / data_parts_min;
+    // make sure data_color in-use starts from 0 and ends at data_parts_min-1
+    int data_color = node_id / num_data_groups +
+      (node_id % (num_nodes / data_parts_max)) * data_parts_min;
+    LOG(INFO) << "Create data_in_out distribution: "
+              << data_parts_in_ << " ==> " << data_parts_out_
+              << ", (" << data_parts_max / data_parts_min
+              << ",1), data color: " << data_color
+              << ", data color max: " << data_parts_min-1;
+    distrib_data_in_out_ = mn::create_distrib(
+      data_parts_max / data_parts_min, 1, data_color, -1, data_parts_min-1, -1);
+  }
+}
+
+template <typename Dtype>
+void MnActivationLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  const vector<int> &bottom_shape = bottom[0]->shape();
+  vector<int> top_shape = bottom[0]->shape();
+  // re-group and distribute the data parts
+  top_shape[0] = bottom_shape[0] * data_parts_in_ / data_parts_out_;
+  if (top_shape.size() > 1) {
+    // gather all the model parts split from previous output
+    top_shape[1] = bottom_shape[1] * model_parts_in_;
+  }
+  top[0]->Reshape(top_shape);
+  top_reduce_buf_.ReshapeLike(*top[0]);
+  vector<int> bottom_gather_shape = bottom[0]->shape();
+  if (bottom_shape.size() > 1) {
+    bottom_gather_shape[1] = bottom_shape[1] * model_parts_in_;
+  }
+  bottom_gather_buf_.Reshape(bottom_gather_shape);
+  bottom_gather_work_buf_.Reshape(bottom_gather_shape);
+}
+
+template <typename Dtype>
+void MnActivationLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  Dtype *bottom_work_buf = (Dtype*)bottom[0]->cpu_data();
+  if (model_parts_in_ > 1) {
+    distrib_in_->gather<Dtype,MLSL::GT_MODEL>(
+      (Dtype*)bottom[0]->cpu_data(), bottom[0]->count(),
+      bottom_gather_buf_.mutable_cpu_data());
+    if (data_parts_in_ == data_parts_out_) {
+      bottom_work_buf = top[0]->mutable_cpu_data();
+    } else {
+      bottom_work_buf = bottom_gather_work_buf_.mutable_cpu_data();
+    }
+    Unpack(
+      bottom_gather_buf_.cpu_data(),
+      bottom[0]->shape(0), bottom[0]->shape(1), bottom[0]->count(2),
+      model_parts_in_,
+      bottom_work_buf);
+  }
+  if (data_parts_in_ > data_parts_out_) {
+    distrib_data_in_out_->gather<Dtype,MLSL::GT_DATA>(
+      bottom_work_buf, bottom[0]->count() * model_parts_in_,
+      top[0]->mutable_cpu_data());
+  } else if (data_parts_in_ < data_parts_out_) {
+    distrib_data_in_out_->scatter<Dtype,MLSL::GT_DATA>(
+      bottom_work_buf, top[0]->mutable_cpu_data(),
+      top[0]->count());
+  } else {
+    if (bottom_work_buf != top[0]->mutable_cpu_data()) {
+      caffe_copy(
+        top[0]->count(), bottom_work_buf, top[0]->mutable_cpu_data());
+    }
+  }
+  distrib_out_->bcast<Dtype,MLSL::GT_MODEL>(
+    top[0]->mutable_cpu_data(), top[0]->count());
+}
+
+template <typename Dtype>
+bool MnActivationLayer<Dtype>::Backward_cpu_fast(const vector<Blob<Dtype>*>& top,
+      const vector<Blob<Dtype>*>& bottom) {
+  if (num_nodes_in_ == num_nodes_out_ &&
+      model_parts_in_ == model_parts_out_ &&
+      model_parts_in_ > 1) {
+    Pack(top[0]->cpu_diff(), bottom_gather_work_buf_.mutable_cpu_data(),
+         bottom[0]->shape(0), bottom[0]->shape(1), bottom[0]->count(2),
+         model_parts_in_);
+    distrib_out_->reducescatter<Dtype,MLSL::RT_SUM,MLSL::GT_MODEL>(
+      bottom_gather_work_buf_.mutable_cpu_data(),
+      bottom[0]->mutable_cpu_diff(), bottom[0]->count());
+    return true;
+  }
+  return false;
+}
+
+template <typename Dtype>
+void MnActivationLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  if (propagate_down[0]) {
+    if (Backward_cpu_fast(top, bottom)) return;
+    Dtype *top_work_buf = (Dtype*)top[0]->cpu_diff();
+    if (model_parts_out_ > 1 &&
+        this->layer_param_.mn_activation_param().need_reduce()) {
+      distrib_out_->reduce<Dtype,MLSL::RT_SUM,MLSL::GT_MODEL>(
+        (Dtype*)top[0]->cpu_diff(), top_reduce_buf_.mutable_cpu_data(),
+        top_reduce_buf_.count());
+      top_work_buf = top_reduce_buf_.mutable_cpu_data();
+    }
+    Dtype *bottom_work_buf = bottom[0]->mutable_cpu_diff();
+    if (model_parts_in_ > 1) {
+      bottom_work_buf = bottom_gather_buf_.mutable_cpu_data();
+    }
+    if (data_parts_in_ > data_parts_out_) {
+      distrib_data_in_out_->scatter<Dtype,MLSL::GT_DATA>(
+        top_work_buf, bottom_work_buf,
+        bottom_gather_buf_.count());
+    } else if (data_parts_in_ < data_parts_out_) {
+      distrib_data_in_out_->gather<Dtype,MLSL::GT_DATA>(
+        top_work_buf, top[0]->count(),
+        bottom_work_buf);
+    } else {
+      if (model_parts_in_ > 1) {
+        bottom_work_buf = top_work_buf;
+      } else {
+        caffe_copy(
+          bottom[0]->count(), top_work_buf, bottom_work_buf);
+      }
+    }
+    if (model_parts_in_ > 1) {
+      Pack(bottom_work_buf, bottom_gather_work_buf_.mutable_cpu_data(),
+           bottom[0]->shape(0), bottom[0]->shape(1), bottom[0]->count(2),
+           model_parts_in_);
+      distrib_in_->scatter<Dtype,MLSL::GT_MODEL>(
+        bottom_gather_work_buf_.mutable_cpu_data(),
+        bottom[0]->mutable_cpu_diff(), bottom[0]->count());
+    }
+  }
+}
+
+template <typename Dtype>
+void MnActivationLayer<Dtype>::Unpack(const Dtype *src, int N, int C, int HW, int numC, Dtype *dst) {
+  int dstC = numC * C;
+#pragma omp parallel for collapse (2)
+  for (int iN = 0; iN < N; iN++) {
+    for (int iC = 0; iC < dstC; iC++) {
+      int iSrc =  iC / C;
+      int iSrcC = iC % C;
+      for (int iHW = 0; iHW < HW; iHW++) {
+        dst[iN*dstC*HW + iC*HW + iHW] =
+          src[iSrc*N*C*HW + iN*C*HW + iSrcC*HW + iHW];
+      }
+    }
+  }
+}
+
+template <typename Dtype>
+void MnActivationLayer<Dtype>::Pack(const Dtype *src, Dtype *dst, int N, int C, int HW, int numC) {
+  int srcC = numC * C;
+  for (int iDst = 0; iDst < numC; iDst++) {
+#pragma omp parallel for collapse (2)
+    for (int iN = 0; iN < N; iN++) {
+      for (int iC = 0; iC < C; iC++) {
+        int iSrcC = iDst * C + iC;
+        for (int iHW = 0; iHW < HW; iHW++) {
+          dst[iDst*N*C*HW + iN*C*HW + iC*HW + iHW] =
+            src[iN*srcC*HW + iSrcC*HW + iHW];
+        }
+      }
+    }
+  }
+}
+
+template <typename Dtype>
+bool MnActivationLayer<Dtype>::Bypass(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top) {
+  return distrib_in_->get_group_id() > 0 && distrib_out_->get_group_id() > 0;
+}
+
+#ifdef CPU_ONLY
+STUB_GPU(MnActivationLayer);
+#endif
+
+INSTANTIATE_CLASS(MnActivationLayer);
+REGISTER_LAYER_CLASS(MnActivation);
+}  // namespace caffe
+
+#endif
+
diff --git a/src/caffe/multinode/multi_solver.cpp b/src/caffe/multinode/multi_solver.cpp
index 6961f4b1b..86e9b37ef 100644
--- a/src/caffe/multinode/multi_solver.cpp
+++ b/src/caffe/multinode/multi_solver.cpp
@@ -62,11 +62,11 @@ Dtype MultiSolver<Dtype>::ForwardBackwardImpl(bool first, bool last) {
 #endif /* CAFFE_PER_LAYER_TIMINGS */
 
   net.ClearParamDiffs();
+
   for (int i = 0; i < layers.size(); ++i) {
 #ifdef CAFFE_PER_LAYER_TIMINGS
     timer.Start();
 #endif
-
     loss += net.ForwardFromTo(i, i);
 
 #ifdef CAFFE_PER_LAYER_TIMINGS
@@ -82,7 +82,7 @@ Dtype MultiSolver<Dtype>::ForwardBackwardImpl(bool first, bool last) {
     if (!layer_need_backward[i]) {
       continue;
     }
-    
+
     net.BackwardFromTo(i, i);
 
     if (last && (layers[i]->layerOp != nullptr) && layers[i]->layerOp->HasParameterSets()) {
@@ -97,15 +97,14 @@ Dtype MultiSolver<Dtype>::ForwardBackwardImpl(bool first, bool last) {
   }
 
   if (last) {
-    for (int i = 0; i < layers.size(); ++i) {
 
+    for (int i = 0; i < layers.size(); ++i) {
 #ifdef CAFFE_PER_LAYER_TIMINGS
       timer.Start();
 #endif
-
       if (!layer_need_backward[i] || ((layers[i]->layerOp != nullptr) && !layers[i]->layerOp->HasParameterSets())) {
         DLOG(INFO) << "ForwardBackwardImpl: no need for apply_updates for layer # " << i
-                   << ", skip on_delwt_wait, apply_updates, on_wtinc_ready";
+          << ", skip on_delwt_wait, apply_updates, on_wtinc_ready";
         continue;
       }
 
@@ -113,16 +112,12 @@ Dtype MultiSolver<Dtype>::ForwardBackwardImpl(bool first, bool last) {
         callbacks_[j]->on_delwt_wait(i);
       }
 
-      boost::shared_ptr<Layer<Dtype>> layer{ net.layers()[i] };
-
       for (int j = 0; j < callbacks_.size(); ++j) {
-          callbacks_[j]->apply_updates(i);
+        callbacks_[j]->apply_updates(i);
       }
-
 #ifdef CAFFE_PER_LAYER_TIMINGS
       update_time_per_layer[i] += timer.MicroSeconds();
 #endif
-
     }
   }
 
diff --git a/src/caffe/multinode/multi_sync.cpp b/src/caffe/multinode/multi_sync.cpp
index 0789c1423..eb6229ed4 100644
--- a/src/caffe/multinode/multi_sync.cpp
+++ b/src/caffe/multinode/multi_sync.cpp
@@ -44,13 +44,10 @@ namespace caffe {
 template<typename Dtype>
 MultiSync<Dtype>::MultiSync(shared_ptr<Solver<Dtype> > root_solver)
         : solver(boost::make_shared<MultiSolver<Dtype> >(root_solver)),
-          snapshot_per_iters(root_solver->param().snapshot()),
           layers(root_solver->net()->layers()),
           net(root_solver->net()),
           net_params(root_solver->net()->learnable_params()) {
   root_solver->param().set_disabled_update(true);
-  if (!is_root) root_solver->param().clear_snapshot();
-  if (!is_root) root_solver->param().set_snapshot_after_train(false);
 
   if (root_solver->iter() == 0)
     root_solver->set_iter(1);
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 545a1f00f..9ea85e973 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -44,6 +44,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "hdf5.h"
 
+#include "boost/algorithm/string.hpp"
+
 #include "caffe/common.hpp"
 #include "caffe/layer.hpp"
 #include "caffe/net.hpp"
@@ -58,6 +60,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/multinode/mlsl.hpp"
+#include "caffe/multinode/apply_mn_param.hpp"
 
 PERFORMANCE_CREATE_MONITOR();
 
@@ -125,13 +128,25 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
     filtered_param.set_engine("MKLDNN");
 #endif
   engine_name_ = filtered_param.engine();
+
+  NetParameter& param = filtered_param;
   // Create a copy of filtered_param with splits added where necessary.
   NetParameter param_with_splits;
-  InsertSplits(filtered_param, &param_with_splits);
+  InsertSplits(param, &param_with_splits);
+  param = param_with_splits;
 
+  NetParameter compiled_param;
   // Transform Net (merge layers etc.) improve computational performance
-  NetParameter param;
-  CompileNet(param_with_splits, &param);
+  CompileNet(param, &compiled_param);
+  param = compiled_param;
+
+#ifdef USE_MLSL
+  NetParameter param_with_mn;
+  if (mn::is_multinode()) {
+    ApplyMultinodeParams<Dtype>(param, &param_with_mn);
+    param = param_with_mn;
+  }
+#endif
 
   // Printing processed model
   if (Caffe::root_solver()) {
@@ -142,6 +157,9 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
     fflush(0);
   }
 
+#ifdef USE_MLSL
+  int global_batch_size = -1;
+#endif
   // Basically, build all the layers and set up their connections.
   name_ = param.name();
   map<string, int> blob_name_to_idx;
@@ -245,7 +263,12 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
 
         if (caffe::TRAIN == param.state().phase()) {
             LOG(WARNING) << "SetMinibatchSize " << batch_size;
-            mn::train::set_global_minibatch_size(batch_size * mn::get_nodes_count());
+            if (global_batch_size < 0) {
+              global_batch_size = batch_size * mn::get_nodes_count();
+              mn::train::set_global_minibatch_size(global_batch_size);
+            } else {
+              CHECK_EQ(global_batch_size, batch_size * mn::get_nodes_count());
+            }
         }
     }
 #endif /* USE_MLSL */
@@ -411,7 +434,7 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
                                         * sizeof(Dtype);
                   int caffe_weight_size = learnable_params_[param_ids[i]]->count() * sizeof(Dtype);
                   if (mlsl_weight_size < caffe_weight_size)
-                      LOG(FATAL) << "InitNet: ERROR: check weight sizes for layer " << layer->type() << ", layer_id " << layer_id 
+                      LOG(FATAL) << "InitNet: ERROR: check weight sizes for layer " << layer->type() << ", layer_id " << layer_id
                                  << ", param_id " << param_ids[i]
                                  << ", MLSL weight size in bytes " << mlsl_weight_size
                                  << ", CAFFE weight size in bytes " << caffe_weight_size;
@@ -592,7 +615,7 @@ void Net<Dtype>::CompilationRuleTwo(const NetParameter& param,
     //          H == 0 &&
     //          I == string::npos)))))
     */
-    if ((param.state().phase() == TEST) && 
+    if ((param.state().phase() == TEST) &&
         (layer_param->type().compare("Convolution") == 0) &&
        ((layer_param->convolution_param().engine() == ConvolutionParameter_Engine_MKLDNN)
        || (((layer_param->convolution_param().engine() == ConvolutionParameter_Engine_DEFAULT) &&
@@ -1191,9 +1214,31 @@ void Net<Dtype>::Reshape() {
 template <typename Dtype>
 void Net<Dtype>::CopyTrainedLayersFrom(const NetParameter& param_inp) {
   NetParameter param_tmp = param_inp;
-  param_tmp.set_engine(engine_name_);
-  NetParameter param;
-  CompileNet(param_tmp, &param);
+  NetParameter &param = param_tmp;
+  param.set_engine(engine_name_);
+  NetParameter param_compiled;
+  CompileNet(param, &param_compiled);
+  param = param_compiled;
+#ifdef USE_MLSL
+  NetParameter param_mn;
+  if (mn::is_multinode()) {
+    // set per-layer multi-node parameters before adjusting net proto
+    for (int i = 0; i < param.layer_size(); i++) {
+      LayerParameter* source_layer = param.mutable_layer(i);
+      const string& source_layer_name = source_layer->name();
+      int target_layer_id = 0;
+      while (target_layer_id != layer_names_.size() &&
+             layer_names_[target_layer_id] != source_layer_name) {
+        ++target_layer_id;
+      }
+      if (target_layer_id == layer_names_.size()) continue;
+      *source_layer->mutable_multinode() =
+        layers_[target_layer_id]->layer_param().multinode();
+    }
+    ApplyMultinodeParams<Dtype>(param, &param_mn);
+    param = param_mn;
+  }
+#endif
 
   int num_source_layers = param.layer_size();
   for (int i = 0; i < num_source_layers; ++i) {
@@ -1290,8 +1335,29 @@ void Net<Dtype>::CopyTrainedLayersFromHDF5(const string trained_filename) {
               << source_layer_name;
         }
       }
+#ifdef USE_MLSL
+      const MultinodeLayerParameter &mn_layer_param =
+        layers_[target_layer_id]->layer_param().multinode();
+      int num_nodes = mn_layer_param.num_nodes();
+      int model_parts = mn_layer_param.model_parts();
+      mn::GetCanonicalMnParam(num_nodes, model_parts);
+      Blob<Dtype> orig_blob;
+      vector<int> shape = target_blobs[j]->shape();
+      CHECK_GT(shape.size(), 0);
+      int offset = 0;
+      if (model_parts > 1) {
+        shape[0] *= model_parts;
+        offset = target_blobs[j]->count() * (mn::get_node_id() % model_parts);
+      }
+      orig_blob.Reshape(shape);
+      hdf5_load_nd_dataset(layer_hid, dataset_name.c_str(), 0, kMaxBlobAxes,
+          &orig_blob);
+      caffe_copy(target_blobs[j]->count(), orig_blob.cpu_data() + offset,
+                 target_blobs[j]->mutable_cpu_data());
+#else
       hdf5_load_nd_dataset(layer_hid, dataset_name.c_str(), 0, kMaxBlobAxes,
           target_blobs[j].get());
+#endif
     }
     H5Gclose(layer_hid);
   }
@@ -1309,6 +1375,14 @@ void Net<Dtype>::ToProto(NetParameter* param, bool write_diff) const {
     LayerParameter* layer_param = param->add_layer();
     layers_[i]->ToProto(layer_param, write_diff);
   }
+  // TODO: Should implement the param adjustment for ToHDF5 as well
+  // TODO: Decompile net to BVLC compatibility
+  // DecompileNet(param);
+#ifdef USE_MLSL
+  if (mn::is_multinode()) {
+    RevertMultinodeParams<Dtype>(param, write_diff);
+  }
+#endif
 }
 
 template <typename Dtype>
@@ -1328,6 +1402,9 @@ void Net<Dtype>::ToHDF5(const string& filename, bool write_diff) const {
   }
   for (int layer_id = 0; layer_id < layers_.size(); ++layer_id) {
     const LayerParameter& layer_param = layers_[layer_id]->layer_param();
+#ifdef USE_MLSL
+    if (layer_param.type() == "MnActivation") continue;
+#endif
     string layer_name = layer_param.name();
     hid_t layer_data_hid = H5Gcreate2(data_hid, layer_name.c_str(),
         H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
@@ -1345,6 +1422,48 @@ void Net<Dtype>::ToHDF5(const string& filename, bool write_diff) const {
       ostringstream dataset_name;
       dataset_name << param_id;
       const int net_param_id = param_id_vecs_[layer_id][param_id];
+#ifdef USE_MLSL
+      const MultinodeLayerParameter &mn_layer_param = layer_param.multinode();
+      int num_nodes = mn_layer_param.num_nodes();
+      int model_parts = mn_layer_param.model_parts();
+      mn::GetCanonicalMnParam(num_nodes, model_parts);
+      Blob<Dtype> new_blob;
+      vector<int> shape = params_[net_param_id]->shape();
+      CHECK_GT(shape.size(), 0);
+      if (model_parts > 1) {
+        mn::Distribution *distrib = mn::get_distrib(num_nodes/model_parts, model_parts);
+        shape[0] *= model_parts;
+        new_blob.Reshape(shape);
+        distrib->allgather<Dtype,MLSL::GT_MODEL>(
+          params_[net_param_id]->mutable_cpu_data(),
+          params_[net_param_id]->count(),
+          new_blob.mutable_cpu_data());
+        if (write_diff) {
+          distrib->allgather<Dtype,MLSL::GT_MODEL>(
+            params_[net_param_id]->mutable_cpu_diff(),
+            params_[net_param_id]->count(),
+            new_blob.mutable_cpu_diff());
+        }
+      } else {
+        new_blob.Reshape(shape);
+        caffe_copy(new_blob.count(), params_[net_param_id]->cpu_data(),
+                   new_blob.mutable_cpu_data());
+        if (write_diff) {
+          caffe_copy(new_blob.count(), params_[net_param_id]->cpu_diff(),
+                     new_blob.mutable_cpu_diff());
+        }
+      }
+      if (param_owners_[net_param_id] == -1) {
+        // Only save params that own themselves
+        hdf5_save_nd_dataset<Dtype>(layer_data_hid, dataset_name.str(),
+            new_blob);
+      }
+      if (write_diff) {
+        // Write diffs regardless of weight-sharing
+        hdf5_save_nd_dataset<Dtype>(layer_diff_hid, dataset_name.str(),
+            new_blob, true);
+      }
+#else
       if (param_owners_[net_param_id] == -1) {
         // Only save params that own themselves
         hdf5_save_nd_dataset<Dtype>(layer_data_hid, dataset_name.str(),
@@ -1355,6 +1474,7 @@ void Net<Dtype>::ToHDF5(const string& filename, bool write_diff) const {
         hdf5_save_nd_dataset<Dtype>(layer_diff_hid, dataset_name.str(),
             *params_[net_param_id], true);
       }
+#endif
     }
     H5Gclose(layer_data_hid);
     if (write_diff) {
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 29485dc26..cc9ee65c9 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -209,6 +209,20 @@ message NetParameter {
 
   // DEPRECATED: use 'layer' instead.
   repeated V1LayerParameter layers = 2;
+  
+  // Multinode settings
+  optional MultinodeParameter multinode = 101;
+}
+
+message MultinodeParameter {
+  repeated MnModelParallelParameter model_parallel = 1;
+}
+
+message MnModelParallelParameter {
+  required string layer_from = 1;
+  optional string layer_to = 2;
+  optional uint32 num_nodes = 3;   // 0 means all nodes
+  optional uint32 model_parts = 4; // 0 or >= num_nodes, means all nodes
 }
 
 message MultiPhaseSolverParameter {
@@ -451,7 +465,7 @@ message ParamSpec {
 // NOTE
 // Update the next available ID when you add a new LayerParameter field.
 //
-// LayerParameter next available layer-specific ID: 150 (last added: engine)
+// LayerParameter next available layer-specific ID: 152 (last added: mn_activation_param)
 message LayerParameter {
   optional string name = 1; // the layer name
   optional string type = 2; // the layer type
@@ -559,6 +573,28 @@ message LayerParameter {
   optional WindowDataParameter window_data_param = 129;
 
   optional string engine = 149 [default = ""];
+  
+  optional MultinodeLayerParameter multinode = 150;
+  optional MnActivationParameter mn_activation_param = 151;
+}
+
+message MultinodeLayerParameter {
+  // 0 means all nodes
+  optional uint32 num_nodes = 1;
+  // 0 or > num_nodes, means all nodes
+  optional uint32 model_parts = 2 [default = 1];
+}
+
+message MnActivationParameter {
+  // 0 means all nodes
+  optional uint32 num_nodes_in = 1;
+  // 0 means all nodes
+  optional uint32 num_nodes_out = 2;
+  // 0 or > num_nodes, means all nodes
+  optional uint32 model_parts_in = 3 [default = 1];
+  // 0 or > num_nodes, means all nodes
+  optional uint32 model_parts_out = 4 [default = 1];
+  optional bool need_reduce = 5 [default = true];
 }
 
 // Message that stores parameters used to apply transformation
diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp
index 82b26a9ca..cf8c31b47 100644
--- a/src/caffe/solver.cpp
+++ b/src/caffe/solver.cpp
@@ -54,7 +54,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "caffe/util/performance.hpp"
 #include "caffe/util/upgrade_proto.hpp"
 
+#ifdef USE_MLSL
 #include "caffe/multinode/mlsl.hpp"
+#endif
 
 namespace caffe {
 
@@ -539,6 +541,11 @@ void Solver<Dtype>::Solve(const char* resume_file) {
 
 template <typename Dtype>
 void Solver<Dtype>::TestAll() {
+#ifdef USE_MLSL
+  for (int i = 0; i < callbacks_.size(); ++i) {
+    callbacks_[i]->on_before_test();
+  }
+#endif
   for (int test_net_id = 0;
        test_net_id < test_nets_.size() && !requested_early_exit_;
        ++test_net_id) {
@@ -550,6 +557,11 @@ void Solver<Dtype>::TestAll() {
       LOG(FATAL) << "Unknown evaluation type: " << param_.eval_type();
     }
   }
+#ifdef USE_MLSL
+  for (int i = 0; i < callbacks_.size(); ++i) {
+    callbacks_[i]->on_after_test();
+  }
+#endif
 }
 
 template <typename Dtype>
@@ -767,11 +779,10 @@ void Solver<Dtype>::Snapshot() {
   CHECK(Caffe::root_solver());
 
 #ifdef USE_MLSL
-  if (mn::get_node_id() != 0) {
-    return;
+  for (int i = 0; i < callbacks_.size(); ++i) {
+    callbacks_[i]->on_before_snapshot();
   }
 #endif /* USE_MLSL */
-
   string model_filename;
   switch (param_.snapshot_format()) {
   case caffe::SolverParameter_SnapshotFormat_BINARYPROTO:
@@ -785,6 +796,12 @@ void Solver<Dtype>::Snapshot() {
   }
 
   SnapshotSolverState(model_filename);
+
+#ifdef USE_MLSL
+  for (int i = 0; i < callbacks_.size(); ++i) {
+    callbacks_[i]->on_after_snapshot();
+  }
+#endif
 }
 
 template <typename Dtype>

From 8bd0ca2c4b24c15316c6d057ce4e519611f2ddc4 Mon Sep 17 00:00:00 2001
From: "Gong, Jiong" <jiong.gong@intel.com>
Date: Fri, 16 Jun 2017 18:47:19 +0800
Subject: [PATCH 10/34] simplify ByPass logic

---
 src/caffe/layer.cpp | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/caffe/layer.cpp b/src/caffe/layer.cpp
index 2a88ea099..df8f68d38 100644
--- a/src/caffe/layer.cpp
+++ b/src/caffe/layer.cpp
@@ -72,11 +72,7 @@ mn::Distribution & Layer<Dtype>::GetDistribution() {
 template <typename Dtype>
 bool Layer<Dtype>::Bypass(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
-  int num_nodes = layer_param_.multinode().num_nodes();
-  int model_parts = layer_param_.multinode().model_parts();
-  mn::GetCanonicalMnParam(num_nodes, model_parts);
-  int data_parts = num_nodes / model_parts;
-  return mn::get_group_id(data_parts, model_parts) > 0;
+  return GetDistribution().get_group_id() > 0;
 }
 
 template <typename Dtype>

From 23cbe39205a868f38699c2a3aabee16bbea07acd Mon Sep 17 00:00:00 2001
From: linxinan <xinan.lin@intel.com>
Date: Sun, 18 Jun 2017 23:27:52 +0800
Subject: [PATCH 11/34] Support customized batch size and engine for
 cpp_classification.

Change-Id: I60639cbc827bbea7a3fd17f89db3cd4e887cd402
---
 .../batch_classification.cpp                  | 452 ++++++++++++++++++
 .../cpp_classification/classification.cpp     |  21 +-
 2 files changed, 466 insertions(+), 7 deletions(-)
 create mode 100644 examples/cpp_classification/batch_classification.cpp

diff --git a/examples/cpp_classification/batch_classification.cpp b/examples/cpp_classification/batch_classification.cpp
new file mode 100644
index 000000000..374671baa
--- /dev/null
+++ b/examples/cpp_classification/batch_classification.cpp
@@ -0,0 +1,452 @@
+/*
+   All modification made by Intel Corporation: © 2016 Intel Corporation
+
+   All contributions by the University of California:
+   Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
+   All rights reserved.
+
+   All other contributions:
+   Copyright (c) 2014, 2015, the respective contributors
+   All rights reserved.
+   For the list of contributors go to https://github.com/BVLC/caffe/blob/master/CONTRIBUTORS.md
+
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <caffe/caffe.hpp>
+#ifdef USE_OPENCV
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+#endif  // USE_OPENCV
+#include <algorithm>
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+#include <gflags/gflags.h>
+
+#ifdef USE_OPENCV
+using namespace caffe;  // NOLINT(build/namespaces)
+using std::string;
+using std::cout;
+using std::cerr;
+using std::endl;
+using std::vector;
+
+DEFINE_string(model, "",
+    "Required; The model definition protocol buffer text file.");
+
+DEFINE_string(weights, "",
+    "Required; The pretrained weights.");
+
+DEFINE_string(input, "",
+    "Required; File that contain the path of input images line by line");
+
+DEFINE_string(label_file, "",
+    "Required; The label file.");
+
+DEFINE_string(engine, "",
+    "Optional; Engine can only be CAFFE | MKL2017 | MKLDNN");
+
+DEFINE_string(mean_file, "",
+    "Optional; The mean file used to subtract from the input image.");
+
+DEFINE_string(mean_value, "104,117,123",
+    "Optional; If specified, can be one value or can be same as image channels"
+    " - would subtract from the corresponding channel). Separated by ','.");
+
+DEFINE_int32(batch_size, 1,
+    "Optional; batch size, default 1");
+
+typedef std::pair<string, float> Prediction;
+
+class Classifier {
+    public:
+        Classifier(const string& model_file,
+                const string& trained_file,
+                const string& mean_file,
+                const string& mean_value,
+                const string& label_file,
+                const string& engine,
+                const size_t batch_size,
+                const size_t topN = 5
+                );
+        vector<vector<Prediction> > ClassifyBatch(vector<cv::Mat>& imgs);
+
+    private:
+        void SetMean(const string& mean_file, const string& mean_value);
+
+        vector<float> PredictBatch(vector<cv::Mat>& imgs);
+
+        void WrapInputLayerBatch(vector<vector<cv::Mat> >* input_channels_batch);
+        void WriteImgToInput(const vector<cv::Mat>& imgs, vector<vector<cv::Mat> >* input_channels_batch);
+        void Preprocess(cv::Mat& img);
+
+        void PreprocessBatch(vector<cv::Mat>& imgs);
+
+    private:
+        shared_ptr<Net<float> > net_;
+        cv::Size input_geometry_;
+        int num_channels_;
+        cv::Mat mean_;
+        size_t batch_size_;
+        size_t topN_;
+        std::vector<string> labels_;
+};
+
+Classifier::Classifier(const string& model_file,
+        const string& trained_file,
+        const string& mean_file,
+        const string& mean_value,
+        const string& label_file,
+        const string& engine,
+        const size_t batch_size,
+        const size_t topN
+        ) {
+#ifdef CPU_ONLY
+    Caffe::set_mode(Caffe::CPU);
+#else
+    Caffe::set_mode(Caffe::GPU);
+#endif
+
+    /* Load the network. */
+    net_.reset(new Net<float>(model_file, TEST, 0, NULL, NULL, engine));
+    net_->CopyTrainedLayersFrom(trained_file);
+
+    CHECK_EQ(net_->num_inputs(), 1) << "Network should have exactly one input.";
+    CHECK_EQ(net_->num_outputs(), 1) << "Network should have exactly one output.";
+
+    Blob<float>* input_layer = net_->input_blobs()[0];
+    num_channels_ = input_layer->channels();
+    CHECK(num_channels_ == 3 || num_channels_ == 1)
+        << "Input layer should have 1 or 3 channels.";
+    input_geometry_ = cv::Size(input_layer->width(), input_layer->height());
+
+    SetMean(mean_file, mean_value);
+
+    batch_size_ = batch_size;
+    topN_ = topN;
+
+    if(!label_file.empty()) {
+    /* Load labels. */
+    std::ifstream labels(label_file.c_str());
+    CHECK(labels) << "Unable to open labels file " << label_file;
+    string line;
+    while (std::getline(labels, line))
+        labels_.push_back(string(line));
+
+    Blob<float>* output_layer = net_->output_blobs()[0];
+    CHECK_EQ(labels_.size(), output_layer->channels())
+        << "Number of labels is different from the output layer dimension.";
+    }
+
+}
+
+
+static bool PairCompare(const std::pair<float, int>& lhs,
+        const std::pair<float, int>& rhs) {
+    return lhs.first > rhs.first;
+}
+
+/* Return the indices of the top N values of vector v. */
+static vector<int> Argmax(const vector<float>& v, int N) {
+    vector<std::pair<float, int> > pairs;
+    for (size_t i = 0; i < v.size(); ++i)
+        pairs.push_back(std::make_pair(v[i], i));
+    std::partial_sort(pairs.begin(), pairs.begin() + N, pairs.end(), PairCompare);
+
+    vector<int> result;
+    for (int i = 0; i < N; ++i)
+        result.push_back(pairs[i].second);
+    return result;
+}
+
+/* Return the top N predictions. */
+vector<vector<Prediction> > Classifier::ClassifyBatch(vector<cv::Mat>& imgs) {
+    vector<float> output_batch = PredictBatch(imgs);
+    vector<vector<Prediction> > predictionsBatch;
+    int output_channels = net_->output_blobs()[0]->channels();
+    for (size_t i = 0; i < batch_size_; ++i) {
+        vector<float> output(output_batch.begin() + i*output_channels, output_batch.begin()+(i+1)*output_channels);
+        vector<int> maxN = Argmax(output, topN_);
+        vector<Prediction>  predictions;
+        for (int i = 0; i < topN_; ++i) {
+            int idx = maxN[i];
+            if(labels_.empty()) {
+                predictions.push_back(std::make_pair(std::to_string(idx), output[idx]));
+            } else{
+                predictions.push_back(std::make_pair(labels_[idx], output[idx]));
+            }
+        }
+        predictionsBatch.push_back(predictions);
+    }
+    return predictionsBatch;
+}
+
+/* Load the mean file in binaryproto format. */
+void Classifier::SetMean(const string& mean_file, const string& mean_value) {
+    cv::Scalar channel_mean;
+    if(!mean_file.empty()) {
+        BlobProto blob_proto;
+        ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);
+
+        /* Convert from BlobProto to Blob<float> */
+        Blob<float> mean_blob;
+        mean_blob.FromProto(blob_proto);
+        CHECK_EQ(mean_blob.channels(), num_channels_)
+            << "Number of channels of mean file doesn't match input layer.";
+
+        /* The format of the mean file is planar 32-bit float BGR or grayscale. */
+        vector<cv::Mat> channels;
+        float* data = mean_blob.mutable_cpu_data();
+        for (int i = 0; i < num_channels_; ++i) {
+            /* Extract an individual channel. */
+            cv::Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data);
+            channels.push_back(channel);
+            data += mean_blob.height() * mean_blob.width();
+        }
+
+        /* Merge the separate channels into a single image. */
+        cv::Mat mean;
+        cv::merge(channels, mean);
+
+        /* Compute the global mean pixel value and create a mean image
+         * filled with this value. */
+        channel_mean = cv::mean(mean);
+        mean_ = cv::Mat(input_geometry_, mean.type(), channel_mean);
+    }
+    if (!mean_value.empty()) {
+        stringstream ss(mean_value);
+        vector<float> values;
+        string item;
+        while (getline(ss, item, ',')) {
+            float value = std::atof(item.c_str());
+            values.push_back(value);
+        }
+        CHECK(values.size() == 1 || values.size() == num_channels_) <<
+            "Specify either 1 mean_value or as many as channels: " << num_channels_;
+
+        std::vector<cv::Mat> channels;
+        for (int i = 0; i < num_channels_; ++i) {
+            /* Extract an individual channel. */
+            cv::Mat channel(input_geometry_.height, input_geometry_.width, CV_32FC1,
+                    cv::Scalar(values[i]));
+            channels.push_back(channel);
+        }
+        cv::merge(channels, mean_);
+    }
+}
+
+vector<float> Classifier::PredictBatch(vector<cv::Mat>& imgs) {
+    Blob<float>* input_layer = net_->input_blobs()[0];
+    input_layer->Reshape(batch_size_, num_channels_,
+            input_geometry_.height, input_geometry_.width);
+    /* Forward dimension change to all layers. */
+    net_->Reshape();
+
+    vector<vector<cv::Mat> > input_channels_batch;
+    WrapInputLayerBatch(&input_channels_batch);
+    PreprocessBatch(imgs);
+    WriteImgToInput(imgs, &input_channels_batch);
+
+    net_->Forward();
+
+    /* Copy the output layer to a vector */
+    Blob<float>* output_layer = net_->output_blobs()[0];
+    const float* begin = output_layer->cpu_data();
+    const float* end = begin + output_layer->channels() * batch_size_;
+    printf("output_layer->channels: %d\n", output_layer->channels());
+    return vector<float>(begin, end);
+}
+
+/* Wrap the input layer of the network in separate cv::Mat objects
+ * (one per channel). This way we save one memcpy operation and we
+ * don't need to rely on cudaMemcpy2D. The last preprocessing
+ * operation will write the separate channels directly to the input
+ * layer. */
+void Classifier::WrapInputLayerBatch(vector<vector<cv::Mat> >* input_channels_batch) {
+    Blob<float>* input_layer = net_->input_blobs()[0];
+
+    int width = input_layer->width();
+    int height = input_layer->height();
+    float* input_data = input_layer->mutable_cpu_data();
+    int num = input_layer->num();
+    for( int j = 0; j < num; ++j) {
+        vector<cv::Mat> input_channels;
+        for (int i = 0; i < input_layer->channels(); ++i) {
+            cv::Mat channel(height, width, CV_32FC1, input_data);
+            input_channels.push_back(channel);
+            input_data += width * height;
+        }
+        input_channels_batch->push_back(input_channels);
+    }
+}
+
+void Classifier::WriteImgToInput(const vector<cv::Mat>& imgs,
+        vector<vector<cv::Mat> >* input_channels_batch)
+{
+    for(size_t i=0; i<batch_size_; ++i) {
+        cv::split(imgs[i], input_channels_batch->at(i));
+    }
+}
+
+void Classifier::PreprocessBatch(vector<cv::Mat>& imgs) {
+    for(size_t i=0; i<imgs.size(); ++i) {
+        Preprocess(imgs[i]);
+    }
+}
+
+void Classifier::Preprocess(cv::Mat& img) {
+    /* Convert the input image to the input image format of the network. */
+    cv::Mat sample;
+    if (img.channels() == 3 && num_channels_ == 1)
+        cv::cvtColor(img, sample, cv::COLOR_BGR2GRAY);
+    else if (img.channels() == 4 && num_channels_ == 1)
+        cv::cvtColor(img, sample, cv::COLOR_BGRA2GRAY);
+    else if (img.channels() == 4 && num_channels_ == 3)
+        cv::cvtColor(img, sample, cv::COLOR_BGRA2BGR);
+    else if (img.channels() == 1 && num_channels_ == 3)
+        cv::cvtColor(img, sample, cv::COLOR_GRAY2BGR);
+    else
+        sample = img;
+
+    cv::Mat sample_resized;
+    if (sample.size() != input_geometry_)
+        cv::resize(sample, sample_resized, input_geometry_);
+    else
+        sample_resized = sample;
+
+    cv::Mat sample_float;
+    if (num_channels_ == 3)
+        sample_resized.convertTo(sample_float, CV_32FC3);
+    else
+        sample_resized.convertTo(sample_float, CV_32FC1);
+
+//    cv::Mat sample_normalized;
+//    cv::subtract(sample_float, mean_, sample_normalized);
+    cv::subtract(sample_float, mean_, img);
+
+    /* This operation will write the separate BGR planes directly to the
+     * input layer of the network because it is wrapped by the cv::Mat
+     * objects in input_channels. */
+//    cv::split(sample_normalized, *input_channels);
+
+}
+
+vector<cv::Mat> loadImgBatch(vector<string> imgNames) {
+    vector<cv::Mat> imgs;
+    for(size_t i=0; i<imgNames.size(); ++i) {
+        cv::Mat img = cv::imread(imgNames[i], -1);
+        CHECK(!img.empty()) << "Unable to decode image " << imgNames[i];
+        imgs.push_back(img);
+    }
+    return imgs;
+}
+
+void printPrediction(vector<Prediction> predictions) {
+    /* Print the top N predictions. */
+    for (size_t i = 0; i < predictions.size(); ++i) {
+        Prediction p = predictions[i];
+        cout << std::fixed << std::setprecision(4) << p.second << " - \""
+            << p.first << "\"" << endl;
+    }
+}
+
+void printPredictionsBatch(vector<string> imgNames,
+        vector<vector<Prediction> > predictionsBatch) {
+    for( size_t i = 0; i < predictionsBatch.size(); ++i) {
+        cout << "---------- "<< i + 1 <<": Prediction for "
+            << imgNames[i] << " ----------" << endl;
+        printPrediction(predictionsBatch[i]);
+    }
+}
+
+vector<string> readImgListFromPath(string file) {
+    vector<string> rawImgNames;
+    std::ifstream input_lines(file.c_str());
+    CHECK(input_lines) << "Unable to open file " << file;
+    string line;
+    while (std::getline(input_lines, line))
+        rawImgNames.push_back(string(line));
+    return rawImgNames;
+}
+int main(int argc, char** argv) {
+
+    ::google::InitGoogleLogging(argv[0]);
+
+#ifndef GFLAGS_GFLAGS_H_
+    namespace gflags = google;
+#endif
+
+    gflags::SetUsageMessage("Image classification.\n"
+        "Usage:\n"
+        "batch_classification <args>\n"
+        "Example: ./batch_classification --model <model path> --weights <weights path> --input <input.txt> --batch_size <num>"
+        );
+    gflags::ParseCommandLineFlags(&argc, &argv, true);
+
+
+    CHECK_GT(FLAGS_model.size(), 0) << "Need a model definition to score.";
+    CHECK_GT(FLAGS_weights.size(), 0) << "Need model weights to score.";
+    CHECK_GT(FLAGS_input.size(), 0) << "Need model weights to score.";
+
+    cout<<"Use batch size: "<< FLAGS_batch_size << endl;
+
+    if (FLAGS_mean_file.empty()) {
+        cout<<"Use mean value: "<< FLAGS_mean_value<<endl;
+    }else{
+        cout<<"Use mean file: "<<FLAGS_mean_file<<endl;
+    }
+
+    Classifier classifier(FLAGS_model, FLAGS_weights, FLAGS_mean_file,
+            FLAGS_mean_value, FLAGS_label_file, FLAGS_engine, FLAGS_batch_size);
+
+    vector<string> rawImgNames = readImgListFromPath(FLAGS_input);
+
+    if(rawImgNames.size() > 0 && rawImgNames.size() < FLAGS_batch_size) {
+        while(rawImgNames.size() < FLAGS_batch_size) {
+            rawImgNames.insert(rawImgNames.end(), rawImgNames.begin(), rawImgNames.end());
+        }
+    }
+
+    vector<string> imgNames(rawImgNames.begin(), rawImgNames.begin() + FLAGS_batch_size);
+    vector<cv::Mat> imgs = loadImgBatch(rawImgNames);
+
+    vector<vector<Prediction> > predictionsBatch = classifier.ClassifyBatch(imgs);
+
+    printPredictionsBatch(imgNames, predictionsBatch);
+
+    return 0;
+}
+
+
+
+#else
+int main(int argc, char** argv) {
+    LOG(FATAL) << "This example requires OpenCV; compile with USE_OPENCV.";
+}
+#endif  // USE_OPENCV
diff --git a/examples/cpp_classification/classification.cpp b/examples/cpp_classification/classification.cpp
index 145ba699d..1e69ccc91 100644
--- a/examples/cpp_classification/classification.cpp
+++ b/examples/cpp_classification/classification.cpp
@@ -60,7 +60,8 @@ class Classifier {
   Classifier(const string& model_file,
              const string& trained_file,
              const string& mean_file,
-             const string& label_file);
+             const string& label_file,
+             const string& engine);
 
   std::vector<Prediction> Classify(const cv::Mat& img, int N = 5);
 
@@ -85,7 +86,8 @@ class Classifier {
 Classifier::Classifier(const string& model_file,
                        const string& trained_file,
                        const string& mean_file,
-                       const string& label_file) {
+                       const string& label_file,
+                       const string& engine) {
 #ifdef CPU_ONLY
   Caffe::set_mode(Caffe::CPU);
 #else
@@ -93,7 +95,7 @@ Classifier::Classifier(const string& model_file,
 #endif
 
   /* Load the network. */
-  net_.reset(new Net<float>(model_file, TEST));
+  net_.reset(new Net<float>(model_file, TEST, 0, NULL, NULL, engine));
   net_->CopyTrainedLayersFrom(trained_file);
 
   CHECK_EQ(net_->num_inputs(), 1) << "Network should have exactly one input.";
@@ -264,10 +266,10 @@ void Classifier::Preprocess(const cv::Mat& img,
 }
 
 int main(int argc, char** argv) {
-  if (argc != 6) {
+  if (argc < 7) {
     std::cerr << "Usage: " << argv[0]
               << " deploy.prototxt network.caffemodel"
-              << " mean.binaryproto labels.txt img.jpg" << std::endl;
+              << " mean.binaryproto labels.txt img.jpg [CAFFE|MKL2017|MKLDNN]" << std::endl;
     return 1;
   }
 
@@ -277,9 +279,14 @@ int main(int argc, char** argv) {
   string trained_file = argv[2];
   string mean_file    = argv[3];
   string label_file   = argv[4];
-  Classifier classifier(model_file, trained_file, mean_file, label_file);
+  string file         = argv[5];
+  string engine = "";
+  if (argc > 6) {
+    engine = argv[6];
+  }
+
+  Classifier classifier(model_file, trained_file, mean_file, label_file, engine);
 
-  string file = argv[5];
 
   std::cout << "---------- Prediction for "
             << file << " ----------" << std::endl;

From 8e32af11c173176e69eee8c6d8dac769351943b6 Mon Sep 17 00:00:00 2001
From: "Yu, Chong" <chong.yu@intel.com>
Date: Mon, 19 Jun 2017 14:10:47 +0800
Subject: [PATCH 12/34] 1. Update the mkldnn version to fix the ResNet
 regression and improve the KNL performance. 2. Change to keep the backward
 compatibility of cpp_classification.

---
 examples/cpp_classification/classification.cpp | 2 +-
 mkldnn.commit                                  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/cpp_classification/classification.cpp b/examples/cpp_classification/classification.cpp
index 1e69ccc91..5b8aa21e4 100644
--- a/examples/cpp_classification/classification.cpp
+++ b/examples/cpp_classification/classification.cpp
@@ -266,7 +266,7 @@ void Classifier::Preprocess(const cv::Mat& img,
 }
 
 int main(int argc, char** argv) {
-  if (argc < 7) {
+  if (argc < 6) {
     std::cerr << "Usage: " << argv[0]
               << " deploy.prototxt network.caffemodel"
               << " mean.binaryproto labels.txt img.jpg [CAFFE|MKL2017|MKLDNN]" << std::endl;
diff --git a/mkldnn.commit b/mkldnn.commit
index 3cb3ee7f7..4e6af52a3 100644
--- a/mkldnn.commit
+++ b/mkldnn.commit
@@ -1 +1 @@
-a7e17b753c622906f8bdc78f8510e023fc10daaf
+22bf25f29369d247098968837b21f3d1bdb2336e

From 98622e29d86e151af1fdafd3a9d40af09e6cb410 Mon Sep 17 00:00:00 2001
From: "Gong, Jiong" <jiong.gong@intel.com>
Date: Mon, 19 Jun 2017 19:26:01 +0800
Subject: [PATCH 13/34] use MLSL_DEFAULT_COLOR to replace -1

---
 include/caffe/multinode/mlsl.hpp            | 16 +++++++++-------
 src/caffe/multinode/mn_activation_layer.cpp |  3 ++-
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/include/caffe/multinode/mlsl.hpp b/include/caffe/multinode/mlsl.hpp
index b0851e9ee..b135e4673 100644
--- a/include/caffe/multinode/mlsl.hpp
+++ b/include/caffe/multinode/mlsl.hpp
@@ -46,6 +46,8 @@
 namespace caffe {
   namespace mn {
 
+#define MLSL_DEFAULT_COLOR -1
+
     inline void free(void *addr) {
       return MLSL::Environment::GetEnv().Free(addr);
     }
@@ -101,12 +103,12 @@ namespace caffe {
       Distribution & operator = (const Distribution &) = delete;
       Distribution(const Distribution &) = delete;
 
-      Distribution(int dataParts, int modelParts, int dataColor = -1, int modelColor = -1,
-                   int dataColorMax = -1, int modelColorMax = -1) :
+      Distribution(int dataParts, int modelParts, int dataColor = MLSL_DEFAULT_COLOR, int modelColor = MLSL_DEFAULT_COLOR,
+                   int dataColorMax = MLSL_DEFAULT_COLOR, int modelColorMax = MLSL_DEFAULT_COLOR) :
         data_parts_(dataParts), model_parts_(modelParts),
         data_color_(dataColor), model_color_(modelColor),
         data_color_max_(dataColorMax), model_color_max_(modelColorMax) {
-        if (dataColor == -1 || modelColor == -1) {
+        if (dataColor == MLSL_DEFAULT_COLOR || modelColor == MLSL_DEFAULT_COLOR) {
           distrib_ = MLSL::Environment::GetEnv().CreateDistribution(dataParts, modelParts);
         } else {
           distrib_ = MLSL::Environment::GetEnv().CreateDistributionWithColors(dataColor, modelColor);
@@ -197,9 +199,9 @@ namespace caffe {
       }
     private:
       inline bool skip_comm(MLSL::GroupType Gtype) {
-        if (Gtype == MLSL::GT_DATA && data_color_max_ != -1) {
+        if (Gtype == MLSL::GT_DATA && data_color_max_ != MLSL_DEFAULT_COLOR) {
           return data_color_ > data_color_max_;
-        } else if (Gtype == MLSL::GT_MODEL && model_color_max_ != -1) {
+        } else if (Gtype == MLSL::GT_MODEL && model_color_max_ != MLSL_DEFAULT_COLOR) {
           return model_color_ > model_color_max_;
         } else return get_group_id() > 0;
       }
@@ -219,8 +221,8 @@ namespace caffe {
     }
 
     shared_ptr<Distribution> create_distrib(
-      int dataParts, int modelParts, int dataColor = -1, int modelColor = -1,
-      int dataColorMax = -1, int modelColorMax = -1);
+      int dataParts, int modelParts, int dataColor = MLSL_DEFAULT_COLOR, int modelColor = MLSL_DEFAULT_COLOR,
+      int dataColorMax = MLSL_DEFAULT_COLOR, int modelColorMax = MLSL_DEFAULT_COLOR);
     Distribution * get_distrib(int dataParts, int modelParts);
     Distribution * get_distrib();
 
diff --git a/src/caffe/multinode/mn_activation_layer.cpp b/src/caffe/multinode/mn_activation_layer.cpp
index ad37ec191..7b4c7b8ca 100644
--- a/src/caffe/multinode/mn_activation_layer.cpp
+++ b/src/caffe/multinode/mn_activation_layer.cpp
@@ -79,7 +79,8 @@ void MnActivationLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
               << ",1), data color: " << data_color
               << ", data color max: " << data_parts_min-1;
     distrib_data_in_out_ = mn::create_distrib(
-      data_parts_max / data_parts_min, 1, data_color, -1, data_parts_min-1, -1);
+      data_parts_max / data_parts_min, 1, data_color, MLSL_DEFAULT_COLOR,
+      data_parts_min-1, MLSL_DEFAULT_COLOR);
   }
 }
 

From ade99a1f9ff96aa64a9c7134c83c8d98e3f4ff6f Mon Sep 17 00:00:00 2001
From: "Gong, Jiong" <jiong.gong@intel.com>
Date: Mon, 19 Jun 2017 21:32:27 +0800
Subject: [PATCH 14/34] disable model parallelism for cross-channel LRN

---
 src/caffe/multinode/apply_mn_param.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/caffe/multinode/apply_mn_param.cpp b/src/caffe/multinode/apply_mn_param.cpp
index 1e406dc45..a2ec34765 100644
--- a/src/caffe/multinode/apply_mn_param.cpp
+++ b/src/caffe/multinode/apply_mn_param.cpp
@@ -84,6 +84,13 @@ void ApplyMultinodeParams(const NetParameter& param,
           marked_blobs.insert(layer_param.top(j));
         }
         net_layer_params[layer_param.name()] = model_parallel_param;
+        // For cross-channel LRN, we assume there is always one model part
+        // for simple implementation.
+        if (layer_param.type() == "LRN" &&
+            layer_param.lrn_param().norm_region() ==
+            LRNParameter_NormRegion_ACROSS_CHANNELS) {
+          net_layer_params[layer_param.name()].set_model_parts(1);
+        }
       }
       if (layer_param.name() == layer_to ||
           layer_param.top_size() == 0) {

From 178f530b39336b4e38c1ba80cf29406a6e76de04 Mon Sep 17 00:00:00 2001
From: "Yu, Chong" <chong.yu@intel.com>
Date: Wed, 21 Jun 2017 01:03:24 +0800
Subject: [PATCH 15/34] Add the shell script to support the automatic
 downloading and installation of MLSL.

---
 external/mlsl/prepare_mlsl.sh | 85 +++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)
 create mode 100755 external/mlsl/prepare_mlsl.sh

diff --git a/external/mlsl/prepare_mlsl.sh b/external/mlsl/prepare_mlsl.sh
new file mode 100755
index 000000000..ded650e35
--- /dev/null
+++ b/external/mlsl/prepare_mlsl.sh
@@ -0,0 +1,85 @@
+#!/bin/sh
+# set -ex
+# 
+# All modification made by Intel Corporation: © 2016 Intel Corporation
+# 
+# This script is used to prepare the Intel® Machine Learning Scaling Library
+#
+FindLibrary()
+{
+# Find all the instances of the MKL libraries present in Caffe
+  MLSL_LIBS=`find $1 -name libmlsl.so`
+  #echo "[Debug][FindLibrary function] MLSL_LIBS: $MLSL_LIBS"
+
+  LOCALMLSL=$MLSL_LIBS
+  #echo "[Debug][FindLibrary function] LOCALMLSL: $LOCALMLSL"
+}
+
+GetVersionName()
+{
+VERSION_LINE=0
+if [ $1 ]; then
+  RAW_VERSION_LINE=`echo $1 | rev | cut -d "_" -f -1 | rev`
+  VERSION_LINE=`echo $RAW_VERSION_LINE | sed 's/\.//g'`
+fi
+if [ -z $VERSION_LINE ]; then
+  VERSION_LINE=0
+fi
+echo $VERSION_LINE  # Return Version Line
+}
+
+# MLSL
+DST=`dirname $0`
+#echo "[Debug] dirname: $0"
+#echo "[Debug] DST value: $DST"
+ABS_DST=`readlink -f $DST`
+#echo "[Debug] ABS_DST value: $ABS_DST"
+VERSION_MATCH=20170014
+ARCHIVE_BASENAME=l_mlsl_p_2017.0.014.tgz
+ARCHIVE_INSTALL_FOLDERNAME=l_mlsl_p_2017.0.014
+MLSL_CONTENT_DIR=`echo $ARCHIVE_BASENAME | rev | cut -d "." -f 2- | rev`
+#echo "[Debug] MLSL_CONTENT_DIR value: $MLSL_CONTENT_DIR"
+GITHUB_RELEASE_TAG=v2017-Preview
+
+MLSLURL="https://github.com/01org/MLSL/releases/download/$GITHUB_RELEASE_TAG/$ARCHIVE_BASENAME"
+#echo "[Debug] MLSLROOT value: $MLSLROOT"
+VERSION_LINE=`GetVersionName $MLSLROOT`
+#echo "[Debug] VERSION_LINE value: $VERSION_LINE"
+# Check if MLSLROOT is set if positive then set one will be used..
+if [ -z $MLSLROOT ] || [ $VERSION_LINE -lt $VERSION_MATCH ]; then
+  # ..if MLSLROOT is not set then check if we have MLSL unpacked and installed in proper version
+  FindLibrary $DST
+  #echo "[Debug] LOCALMLSL value inside if: $LOCALMLSL"
+  if [ -z $LOCALMLSL ]; then
+    echo "No MLSL unpacked and installed in proper version"
+  else
+    echo "Some verison of MLSL is unpacked and installed"
+    MLSL_PREVIOUS_CONTENT_DIR=`echo $LOCALMLSL | rev | cut -d "/" -f 4- | cut -d "/" -f -1 | rev`
+    VERSION_LINE=`GetVersionName $DST/$MLSL_PREVIOUS_CONTENT_DIR`
+  fi
+  #echo "[Debug] VERSION_LINE value inside if: $VERSION_LINE"
+  if [ $VERSION_LINE -lt $VERSION_MATCH ] ; then
+    #...If it is not then downloaded, unpacked and installed
+    wget --no-check-certificate -P $DST $MLSLURL -O $DST/$ARCHIVE_BASENAME
+    tar -xzf $DST/$ARCHIVE_BASENAME -C $DST
+    #echo "[Debug] PWD value: $PWD"
+    #install.sh did not support the relative path as the parameter
+    bash $DST/install.sh -s -d $ABS_DST/$ARCHIVE_INSTALL_FOLDERNAME
+  fi
+  FindLibrary $DST
+  #echo "[Debug] LOCALMLSL value: $LOCALMLSL"
+  #echo "[Debug] PWD value: $PWD"
+  MLSLROOT=$PWD/`echo $LOCALMLSL | sed -e 's/intel64.*$//'`
+  #echo "[Debug] MLSLROOT value: $MLSLROOT"
+fi
+
+if [ -z $LOCALMLSL ] ; then
+# LOCALMLSL is not set, when MLSLROOT was set manually and it should point to MLSL in correct version
+  FindLibrary $MLSLROOT
+fi
+    
+LIBRARIES=`basename $LOCALMLSL | sed -e 's/^.*lib//' | sed -e 's/\.so.*$//'`
+#echo "[Debug] LIBRARIES value: $LIBRARIES"
+
+# return value to calling script (Makefile,cmake)
+echo $MLSLROOT $LIBRARIES

From ad7ace937107dd2f70c9637402e6457043f3eb0c Mon Sep 17 00:00:00 2001
From: "Yu, Chong" <chong.yu@intel.com>
Date: Wed, 21 Jun 2017 02:08:32 +0800
Subject: [PATCH 16/34] 1. Update the prepare_mlsl script for return value. 2.
 Enable the mlsl download and installation for make and CMake.

---
 Makefile                      |  3 +++
 cmake/Dependencies.cmake      | 12 +++++++++++-
 external/mlsl/prepare_mlsl.sh |  7 +++----
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/Makefile b/Makefile
index 7621eec6f..4d9eada0f 100644
--- a/Makefile
+++ b/Makefile
@@ -64,6 +64,9 @@ endif
 #################### MLSL ####################
 
 ifeq ($(USE_MLSL), 1)
+	RETURN_STRING=$(shell ./external/mlsl/prepare_mlsl.sh)
+	MLSL_ROOT=$(firstword $(RETURN_STRING))
+	MLSL_LDFLAGS=$(lastword $(RETURN_STRING))	
 	COMMON_FLAGS += -DUSE_MLSL=1
 	LIBRARIES += mlsl
 	INCLUDE_DIRS += $(MLSL_ROOT)/intel64/include
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 040aa4394..0d27a46f4 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -96,7 +96,17 @@ endif()
 
 # ---[ MLSL
 if(USE_MLSL)
-  set(MLSL_ROOT "$ENV{MLSL_ROOT}")
+  #--find mlsl in external/mkl
+  set(script_cmd "./external/mlsl/prepare_mlsl.sh" )
+  execute_process(COMMAND ${script_cmd}
+	WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
+	RESULT_VARIABLE script_result
+	OUTPUT_VARIABLE RETURN_STRING)
+  separate_arguments(RETURN_STRING)
+  list(GET RETURN_STRING 0 MLSL_ROOT_DIR)
+  list(GET RETURN_STRING 1 MLSL_LIBRARIES)
+  set(MLSL_ROOT "${MLSL_ROOT_DIR}")
+  #set(MLSL_ROOT "$ENV{MLSL_ROOT}")
   if(NOT MLSL_ROOT)
     message(FATAL_ERROR "Unable to find MLSL package installation directory!")
   endif()
diff --git a/external/mlsl/prepare_mlsl.sh b/external/mlsl/prepare_mlsl.sh
index ded650e35..c061eb1f0 100755
--- a/external/mlsl/prepare_mlsl.sh
+++ b/external/mlsl/prepare_mlsl.sh
@@ -50,10 +50,9 @@ if [ -z $MLSLROOT ] || [ $VERSION_LINE -lt $VERSION_MATCH ]; then
   # ..if MLSLROOT is not set then check if we have MLSL unpacked and installed in proper version
   FindLibrary $DST
   #echo "[Debug] LOCALMLSL value inside if: $LOCALMLSL"
-  if [ -z $LOCALMLSL ]; then
-    echo "No MLSL unpacked and installed in proper version"
-  else
-    echo "Some verison of MLSL is unpacked and installed"
+  if [ $LOCALMLSL ]; then
+    #in order to return value to calling script (Makefile,cmake), cannot print other info
+    #echo "[Debug] Some verison of MLSL is unpacked and installed"
     MLSL_PREVIOUS_CONTENT_DIR=`echo $LOCALMLSL | rev | cut -d "/" -f 4- | cut -d "/" -f -1 | rev`
     VERSION_LINE=`GetVersionName $DST/$MLSL_PREVIOUS_CONTENT_DIR`
   fi

From 35ddee983636c373126d9f58511b28c41ec42a79 Mon Sep 17 00:00:00 2001
From: "Yu, Chong" <chong.yu@intel.com>
Date: Wed, 21 Jun 2017 11:44:31 +0800
Subject: [PATCH 17/34] Update the prepare_mlsl shell script to handle the case
 that MLSLROOT is set with no proper version.

---
 external/mlsl/prepare_mlsl.sh | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/external/mlsl/prepare_mlsl.sh b/external/mlsl/prepare_mlsl.sh
index c061eb1f0..aa9752b0e 100755
--- a/external/mlsl/prepare_mlsl.sh
+++ b/external/mlsl/prepare_mlsl.sh
@@ -57,27 +57,32 @@ if [ -z $MLSLROOT ] || [ $VERSION_LINE -lt $VERSION_MATCH ]; then
     VERSION_LINE=`GetVersionName $DST/$MLSL_PREVIOUS_CONTENT_DIR`
   fi
   #echo "[Debug] VERSION_LINE value inside if: $VERSION_LINE"
-  if [ $VERSION_LINE -lt $VERSION_MATCH ] ; then
+  #if MLSLROOT is set, but version is not given, not to download our own version
+  if [ -z $MLSLROOT ] && [ $VERSION_LINE -lt $VERSION_MATCH ] ; then
     #...If it is not then downloaded, unpacked and installed
     wget --no-check-certificate -P $DST $MLSLURL -O $DST/$ARCHIVE_BASENAME
     tar -xzf $DST/$ARCHIVE_BASENAME -C $DST
     #echo "[Debug] PWD value: $PWD"
     #install.sh did not support the relative path as the parameter
     bash $DST/install.sh -s -d $ABS_DST/$ARCHIVE_INSTALL_FOLDERNAME
+	
+    #do not change the value of MLSLROOT if MLSLROOT is set, but version is not given
+    FindLibrary $DST
+    #echo "[Debug] LOCALMLSL value: $LOCALMLSL"
+    #echo "[Debug] PWD value: $PWD"
+    MLSLROOT=$PWD/`echo $LOCALMLSL | sed -e 's/intel64.*$//'`
   fi
-  FindLibrary $DST
-  #echo "[Debug] LOCALMLSL value: $LOCALMLSL"
-  #echo "[Debug] PWD value: $PWD"
-  MLSLROOT=$PWD/`echo $LOCALMLSL | sed -e 's/intel64.*$//'`
   #echo "[Debug] MLSLROOT value: $MLSLROOT"
 fi
 
 if [ -z $LOCALMLSL ] ; then
 # LOCALMLSL is not set, when MLSLROOT was set manually and it should point to MLSL in correct version
   FindLibrary $MLSLROOT
+  LIBRARIES=""
+else
+  LIBRARIES=`basename $LOCALMLSL | sed -e 's/^.*lib//' | sed -e 's/\.so.*$//'`  
 fi
     
-LIBRARIES=`basename $LOCALMLSL | sed -e 's/^.*lib//' | sed -e 's/\.so.*$//'`
 #echo "[Debug] LIBRARIES value: $LIBRARIES"
 
 # return value to calling script (Makefile,cmake)

From 70aaf4b584852b536f0aa0ac1b950408ae3d0a64 Mon Sep 17 00:00:00 2001
From: "Yu, Chong" <chong.yu@intel.com>
Date: Thu, 22 Jun 2017 02:12:52 +0800
Subject: [PATCH 18/34] Update the prepare_mlsl.sh for more complex situations
 of customized MLSLROOT.

---
 external/mlsl/prepare_mlsl.sh | 36 ++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/external/mlsl/prepare_mlsl.sh b/external/mlsl/prepare_mlsl.sh
index aa9752b0e..e5e01e559 100755
--- a/external/mlsl/prepare_mlsl.sh
+++ b/external/mlsl/prepare_mlsl.sh
@@ -57,32 +57,34 @@ if [ -z $MLSLROOT ] || [ $VERSION_LINE -lt $VERSION_MATCH ]; then
     VERSION_LINE=`GetVersionName $DST/$MLSL_PREVIOUS_CONTENT_DIR`
   fi
   #echo "[Debug] VERSION_LINE value inside if: $VERSION_LINE"
-  #if MLSLROOT is set, but version is not given, not to download our own version
-  if [ -z $MLSLROOT ] && [ $VERSION_LINE -lt $VERSION_MATCH ] ; then
-    #...If it is not then downloaded, unpacked and installed
-    wget --no-check-certificate -P $DST $MLSLURL -O $DST/$ARCHIVE_BASENAME
-    tar -xzf $DST/$ARCHIVE_BASENAME -C $DST
-    #echo "[Debug] PWD value: $PWD"
-    #install.sh did not support the relative path as the parameter
-    bash $DST/install.sh -s -d $ABS_DST/$ARCHIVE_INSTALL_FOLDERNAME
-	
+
+  #if MLSLROOT is not set 
+  if [ -z $MLSLROOT ] ; then
+    #if version is not given, or the version is lower than expected version 
+    if [ $VERSION_LINE -lt $VERSION_MATCH ] ; then
+      #Then downloaded, unpacked and installed
+      wget --no-check-certificate -P $DST $MLSLURL -O $DST/$ARCHIVE_BASENAME
+      tar -xzf $DST/$ARCHIVE_BASENAME -C $DST
+      #echo "[Debug] PWD value: $PWD"
+      #install.sh did not support the relative path as the parameter
+      bash $DST/install.sh -s -d $ABS_DST/$ARCHIVE_INSTALL_FOLDERNAME
+    fi
+    #else: version is just our expected version, no need to donload again, but need to set the MLSLROOT
     #do not change the value of MLSLROOT if MLSLROOT is set, but version is not given
     FindLibrary $DST
     #echo "[Debug] LOCALMLSL value: $LOCALMLSL"
     #echo "[Debug] PWD value: $PWD"
     MLSLROOT=$PWD/`echo $LOCALMLSL | sed -e 's/intel64.*$//'`
+  else
+    #if MLSLROOT is set, but version is not given, or the version is lower than expected version
+    #not to download our own version, and just use mlsl as the return value of LIBRARIES
+    LIBRARIES="mlsl"
   fi
   #echo "[Debug] MLSLROOT value: $MLSLROOT"
 fi
 
-if [ -z $LOCALMLSL ] ; then
-# LOCALMLSL is not set, when MLSLROOT was set manually and it should point to MLSL in correct version
-  FindLibrary $MLSLROOT
-  LIBRARIES=""
-else
-  LIBRARIES=`basename $LOCALMLSL | sed -e 's/^.*lib//' | sed -e 's/\.so.*$//'`  
-fi
-    
+#The simplest implementation of LIBRARIES return value
+LIBRARIES="mlsl"
 #echo "[Debug] LIBRARIES value: $LIBRARIES"
 
 # return value to calling script (Makefile,cmake)

From 77cddabdda8c69b2c5de643119e5f2e4e95cd63b Mon Sep 17 00:00:00 2001
From: "Yu, Chong" <chong.yu@intel.com>
Date: Thu, 22 Jun 2017 13:42:51 +0800
Subject: [PATCH 19/34] Fix the asum_data() logic of SYNCED_PRV, fix ICL-74.

---
 src/caffe/blob.cpp | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp
index 2160cf24e..96a3f2341 100644
--- a/src/caffe/blob.cpp
+++ b/src/caffe/blob.cpp
@@ -318,8 +318,15 @@ Dtype Blob<Dtype>::asum_data() const {
   if (!data_) { return 0; }
   switch (data_->head()) {
   case SyncedMemory::SYNCED_PRV:
+      {
+          const Dtype* prv_ptr = prv_data();
+          if (prv_ptr == NULL)
+              return caffe_cpu_asum(count_, cpu_data());
+          else
+              return caffe_cpu_asum(prv_data_count(), prv_data());
+      }
   case SyncedMemory::HEAD_AT_PRV:
-    return caffe_cpu_asum( prv_data_count(), prv_data());
+    return caffe_cpu_asum(prv_data_count(), prv_data());
   case SyncedMemory::HEAD_AT_CPU:
     return caffe_cpu_asum(count_, cpu_data());
   case SyncedMemory::HEAD_AT_GPU:

From 9ea638701762e233ee390145c0fbc2148c463cc1 Mon Sep 17 00:00:00 2001
From: "Shen, Haihao" <haihao.shen@intel.com>
Date: Thu, 22 Jun 2017 16:32:19 +0800
Subject: [PATCH 20/34] Fix batch normalization global status checker; add
 optimization for batch size 1; correct data type for 3D convolution

---
 include/caffe/layers/base_conv_layer.hpp  |   4 +-
 include/caffe/util/math_functions.hpp     |  48 +++---
 src/caffe/layers/base_conv_layer.cpp      |  42 +++---
 src/caffe/layers/concat_layer.cpp         |   2 +-
 src/caffe/layers/deconv_layer.cpp         |  17 ++-
 src/caffe/layers/mkl_batch_norm_layer.cpp |   7 +-
 src/caffe/util/math_functions.cpp         | 176 +++++++++++-----------
 7 files changed, 155 insertions(+), 141 deletions(-)

diff --git a/include/caffe/layers/base_conv_layer.hpp b/include/caffe/layers/base_conv_layer.hpp
index 11236681f..00a819920 100755
--- a/include/caffe/layers/base_conv_layer.hpp
+++ b/include/caffe/layers/base_conv_layer.hpp
@@ -226,8 +226,8 @@ class BaseConvolutionLayer : public Layer<Dtype> {
   int conv_in_channels_;
   int conv_out_spatial_dim_;
   int kernel_dim_;
-  int col_offset_;
-  int output_offset_;
+  size_t col_offset_;
+  size_t output_offset_;
 
   Blob<Dtype> col_buffer_;
   Blob<Dtype> bias_multiplier_;
diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp
index 75230a0c8..47328eced 100644
--- a/include/caffe/util/math_functions.hpp
+++ b/include/caffe/util/math_functions.hpp
@@ -63,49 +63,49 @@ void caffe_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N,
     Dtype* y);
 
 template <typename Dtype>
-void caffe_axpy(const int N, const Dtype alpha, const Dtype* X,
+void caffe_axpy(const long N, const Dtype alpha, const Dtype* X,
     Dtype* Y);
 
 template <typename Dtype>
-void caffe_cpu_axpby(const int N, const Dtype alpha, const Dtype* X,
+void caffe_cpu_axpby(const long N, const Dtype alpha, const Dtype* X,
     const Dtype beta, Dtype* Y);
 
 template <typename Dtype>
-void caffe_copy(const int N, const Dtype *X, Dtype *Y);
+void caffe_copy(const size_t N, const Dtype *X, Dtype *Y);
 
 template <typename Dtype>
-void caffe_cpu_copy(const int N, const Dtype* X, Dtype* Y);
+void caffe_cpu_copy(const size_t N, const Dtype* X, Dtype* Y);
 
 template <typename Dtype>
-void caffe_set(const int N, const Dtype alpha, Dtype *X);
+void caffe_set(const size_t N, const Dtype alpha, Dtype *X);
 
 inline void caffe_memset(const size_t N, const int alpha, void* X) {
   memset(X, alpha, N);  // NOLINT(caffe/alt_fn)
 }
 
 template <typename Dtype>
-void caffe_add_scalar(const int N, const Dtype alpha, Dtype *X);
+void caffe_add_scalar(const long N, const Dtype alpha, Dtype *X);
 
 template <typename Dtype>
-void caffe_scal(const int N, const Dtype alpha, Dtype *X);
+void caffe_scal(const long N, const Dtype alpha, Dtype *X);
 
 template <typename Dtype>
-void caffe_sqr(const int N, const Dtype* a, Dtype* y);
+void caffe_sqr(const long N, const Dtype* a, Dtype* y);
 
 template <typename Dtype>
-void caffe_add(const int N, const Dtype* a, const Dtype* b, Dtype* y);
+void caffe_add(const long N, const Dtype* a, const Dtype* b, Dtype* y);
 
 template <typename Dtype>
-void caffe_sub(const int N, const Dtype* a, const Dtype* b, Dtype* y);
+void caffe_sub(const long N, const Dtype* a, const Dtype* b, Dtype* y);
 
 template <typename Dtype>
-void caffe_mul(const int N, const Dtype* a, const Dtype* b, Dtype* y);
+void caffe_mul(const long N, const Dtype* a, const Dtype* b, Dtype* y);
 
 template <typename Dtype>
-void caffe_div(const int N, const Dtype* a, const Dtype* b, Dtype* y);
+void caffe_div(const long N, const Dtype* a, const Dtype* b, Dtype* y);
 
 template <typename Dtype>
-void caffe_powx(const int n, const Dtype* a, const Dtype b, Dtype* y);
+void caffe_powx(const long n, const Dtype* a, const Dtype b, Dtype* y);
 
 unsigned int caffe_rng_rand();
 
@@ -113,37 +113,37 @@ template <typename Dtype>
 Dtype caffe_nextafter(const Dtype b);
 
 template <typename Dtype>
-void caffe_rng_uniform(const int n, const Dtype a, const Dtype b, Dtype* r);
+void caffe_rng_uniform(const long n, const Dtype a, const Dtype b, Dtype* r);
 
 template <typename Dtype>
-void caffe_rng_gaussian(const int n, const Dtype mu, const Dtype sigma,
+void caffe_rng_gaussian(const long n, const Dtype mu, const Dtype sigma,
                         Dtype* r);
 
 template <typename Dtype>
-void caffe_rng_bernoulli(const int n, const Dtype p, int* r);
+void caffe_rng_bernoulli(const long n, const Dtype p, int* r);
 
 template <typename Dtype>
-void caffe_rng_bernoulli(const int n, const Dtype p, unsigned int* r);
+void caffe_rng_bernoulli(const long n, const Dtype p, unsigned int* r);
 
 template <typename Dtype>
-void caffe_exp(const int n, const Dtype* a, Dtype* y);
+void caffe_exp(const long n, const Dtype* a, Dtype* y);
 
 template <typename Dtype>
-void caffe_log(const int n, const Dtype* a, Dtype* y);
+void caffe_log(const long n, const Dtype* a, Dtype* y);
 
 template <typename Dtype>
-void caffe_abs(const int n, const Dtype* a, Dtype* y);
+void caffe_abs(const long n, const Dtype* a, Dtype* y);
 
 template <typename Dtype>
-Dtype caffe_cpu_dot(const int n, const Dtype* x, const Dtype* y);
+Dtype caffe_cpu_dot(const long n, const Dtype* x, const Dtype* y);
 
 template <typename Dtype>
-Dtype caffe_cpu_strided_dot(const int n, const Dtype* x, const int incx,
+Dtype caffe_cpu_strided_dot(const long n, const Dtype* x, const int incx,
     const Dtype* y, const int incy);
 
 // Returns the sum of the absolute values of the elements of vector x
 template <typename Dtype>
-Dtype caffe_cpu_asum(const int n, const Dtype* x);
+Dtype caffe_cpu_asum(const long n, const Dtype* x);
 
 // the branchless, type-safe version from
 // http://stackoverflow.com/questions/1903954/is-there-a-standard-sign-function-signum-sgn-in-c-c
@@ -180,7 +180,7 @@ DEFINE_CAFFE_CPU_UNARY_FUNC(sgnbit, \
 DEFINE_CAFFE_CPU_UNARY_FUNC(fabs, y[i] = std::fabs(x[i]));
 
 template <typename Dtype>
-void caffe_cpu_scale(const int n, const Dtype alpha, const Dtype *x, Dtype* y);
+void caffe_cpu_scale(const long n, const Dtype alpha, const Dtype *x, Dtype* y);
 
 #ifndef CPU_ONLY  // GPU
 
diff --git a/src/caffe/layers/base_conv_layer.cpp b/src/caffe/layers/base_conv_layer.cpp
index daed130aa..205be870c 100644
--- a/src/caffe/layers/base_conv_layer.cpp
+++ b/src/caffe/layers/base_conv_layer.cpp
@@ -337,7 +337,7 @@ void BaseConvolutionLayer<Dtype>::forward_cpu_gemm(const Dtype* input,
   }
   tid = tid % num_of_threads_;  //  just to be sure
 #endif
-  int col_data_buffer_size = col_buffer_mt_.size()/num_of_threads_;
+  size_t col_data_buffer_size = col_buffer_mt_.size()/num_of_threads_;
 
   Dtype* col_buff = const_cast<Dtype*>(input);
   if (!is_1x1_) {
@@ -376,7 +376,7 @@ void BaseConvolutionLayer<Dtype>::backward_cpu_gemm(const Dtype* output,
   }
   tid = tid % num_of_threads_;  //  just to be sure
 #endif
-  int col_data_buffer_size = col_buffer_mt_.size()/num_of_threads_;
+  size_t col_data_buffer_size = col_buffer_mt_.size()/num_of_threads_;
   Dtype* col_buff = & col_buffer_mt_[ tid* col_data_buffer_size];
 
   if (is_1x1_) {
@@ -396,7 +396,7 @@ void BaseConvolutionLayer<Dtype>::backward_cpu_gemm(const Dtype* output,
 
 template <typename Dtype>
 void BaseConvolutionLayer<Dtype>::clear_weight_mt(void) {
-  unsigned int weight_diff_size = weight_diff_mt_.size() / num_of_threads_;
+  size_t weight_diff_size = weight_diff_mt_.size() / num_of_threads_;
   caffe_memset(num_of_threads_*weight_diff_size*sizeof(Dtype),
                0.,
                &weight_diff_mt_[0]);
@@ -405,24 +405,24 @@ void BaseConvolutionLayer<Dtype>::clear_weight_mt(void) {
 
 template <typename Dtype>
 void BaseConvolutionLayer<Dtype>::sum_weight_mt(Dtype* weight_diff) {
-  unsigned int weight_diff_size =  weight_diff_mt_.size() / num_of_threads_;
-  unsigned int col_per_thread = weight_diff_size/num_of_threads_;
+  size_t weight_diff_size =  weight_diff_mt_.size() / num_of_threads_;
+  size_t col_per_thread = weight_diff_size/num_of_threads_;
   int tid = 0;
 #ifdef _OPENMP
     if (omp_in_parallel()) {
         tid = omp_get_thread_num();
     }
 #endif
-    for (unsigned int j = 0; j < col_per_thread; ++j) {
-      for (unsigned int t = 0; t < num_of_threads_ ; ++t) {
+    for (size_t j = 0; j < col_per_thread; ++j) {
+      for (size_t t = 0; t < num_of_threads_ ; ++t) {
           weight_diff[tid*col_per_thread + j] +=
             weight_diff_mt_[t*weight_diff_size + tid*col_per_thread + j];
       }
     }
 
-    unsigned int j = col_per_thread*num_of_threads_ + tid;
+    size_t j = col_per_thread*num_of_threads_ + tid;
     if (j < weight_diff_size) {
-      for (unsigned int t = 0; t < num_of_threads_ ; ++t) {
+      for (size_t t = 0; t < num_of_threads_ ; ++t) {
         weight_diff[j] += weight_diff_mt_[t * weight_diff_size + j];
       }
     }
@@ -433,22 +433,26 @@ void BaseConvolutionLayer<Dtype>::weight_cpu_gemm(const Dtype* input,
     const Dtype* output, Dtype* weights) {
   int tid = 0;
 #ifdef _OPENMP
-  tid = omp_get_thread_num();
-  if (tid >= num_of_threads_) {
-    LOG(FATAL) << "ConvLayer::weights_cpu_gemm: omp_thread_num() =" << tid
-               << " > OMP_num_THREADS = " << num_of_threads_;
+  Dtype* weight_diff_data = NULL;
+  if (num_of_threads_ > 1) {
+    tid = omp_get_thread_num();
+    if (tid >= num_of_threads_) {
+      LOG(FATAL) << "ConvLayer::weights_cpu_gemm: omp_thread_num() =" << tid
+                 << " > OMP_num_THREADS = " << num_of_threads_;
+    }
+    tid = tid % num_of_threads_;  // just to be sure
+    weight_diff_data = &weight_diff_mt_[tid * (weight_diff_mt_.size() / num_of_threads_)];
+  } else {
+    weight_diff_data = weights;
   }
-  tid = tid % num_of_threads_;  //  just to be sure
-  Dtype* weight_diff_data =
-    & weight_diff_mt_[tid * (weight_diff_mt_.size()/num_of_threads_)];
 #else
   Dtype* weight_diff_data = weights;
 #endif
   Dtype* col_buff = const_cast<Dtype*>(input);
 
   if (!is_1x1_) {
-    int col_data_buffer_size = col_buffer_mt_.size()/num_of_threads_;
-    col_buff = & col_buffer_mt_[ tid* col_data_buffer_size];
+    size_t col_data_buffer_size = col_buffer_mt_.size() / num_of_threads_;
+    col_buff = &col_buffer_mt_[tid * col_data_buffer_size];
     conv_im2col_cpu(input, col_buff);
   }
   for (int g = 0; g < group_; ++g) {
@@ -459,8 +463,6 @@ void BaseConvolutionLayer<Dtype>::weight_cpu_gemm(const Dtype* input,
   }
 }
 
-
-
 template <typename Dtype>
 void BaseConvolutionLayer<Dtype>::backward_cpu_bias(Dtype* bias,
     const Dtype* input) {
diff --git a/src/caffe/layers/concat_layer.cpp b/src/caffe/layers/concat_layer.cpp
index 8a169864b..a252849dd 100644
--- a/src/caffe/layers/concat_layer.cpp
+++ b/src/caffe/layers/concat_layer.cpp
@@ -107,7 +107,7 @@ void ConcatLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
     const int offset_value = offset_concat_axis;
     offset_concat_axis += bottom_concat_axis;
 #ifdef _OPENMP
-  #pragma omp parallel for
+  #pragma omp parallel for if(num_concats_ > 1)
 #endif
     for (int n = 0; n < num_concats_; ++n) {
       caffe_copy(bottom_concat_axis * concat_input_size_,
diff --git a/src/caffe/layers/deconv_layer.cpp b/src/caffe/layers/deconv_layer.cpp
index d212fa035..87e8df7ed 100644
--- a/src/caffe/layers/deconv_layer.cpp
+++ b/src/caffe/layers/deconv_layer.cpp
@@ -69,7 +69,11 @@ void DeconvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
     const Dtype* bottom_data = bottom[i]->cpu_data();
     Dtype* top_data = top[i]->mutable_cpu_data();
 #ifdef _OPENMP
-#   pragma omp parallel for num_threads(this->num_of_threads_)
+    #pragma omp parallel if(this->num_of_threads_ > 1) num_threads(this->num_of_threads_)
+#endif
+    {
+#ifdef _OPENMP
+      #pragma omp for
 #endif
       for (int n = 0; n < this->num_; ++n) {
         this->backward_cpu_gemm(bottom_data + n * this->bottom_dim_, weight,
@@ -79,6 +83,7 @@ void DeconvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
           this->forward_cpu_bias(top_data + n * this->top_dim_, bias);
         }
       }
+    }
   }
 }
 
@@ -102,8 +107,10 @@ void DeconvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
 
     if (this->param_propagate_down_[0] || propagate_down[i]) {
 #ifdef _OPENMP
-      this->clear_weight_mt();
-      #pragma omp parallel num_threads(this->num_of_threads_)
+      if (this->num_of_threads_ > 1) {
+        this->clear_weight_mt();
+      }
+      #pragma omp parallel if(this->num_of_threads_ > 1) num_threads(this->num_of_threads_)
 #endif
       {
 #ifdef _OPENMP
@@ -126,7 +133,9 @@ void DeconvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
           }
         }
 #ifdef _OPENMP
-        this->sum_weight_mt(weight_diff);
+        if (this->num_of_threads_ > 1) {
+          this->sum_weight_mt(weight_diff);
+        }
 #endif
       }
     }
diff --git a/src/caffe/layers/mkl_batch_norm_layer.cpp b/src/caffe/layers/mkl_batch_norm_layer.cpp
index b2e86830f..8a1e44ab8 100755
--- a/src/caffe/layers/mkl_batch_norm_layer.cpp
+++ b/src/caffe/layers/mkl_batch_norm_layer.cpp
@@ -66,12 +66,13 @@ void MKLBatchNormLayer<Dtype>::Init(const vector<Blob<Dtype>*>& bottom,
   eps_ = this->layer_param_.batch_norm_param().eps();
   use_weight_bias_ = this->layer_param_.batch_norm_param().use_weight_bias();
   bias_term_ = this->layer_param_.batch_norm_param().bias_term();
-  use_global_stats_ = this->layer_param_.batch_norm_param().use_global_stats();
+  
+  use_global_stats_ = this->phase_ == TEST;
+  if (this->layer_param_.batch_norm_param().has_use_global_stats())
+    use_global_stats_ = this->layer_param_.batch_norm_param().use_global_stats();
 
   CHECK(use_weight_bias_) << "BatchNorm without scaling have not supported yet";
 
-  use_global_stats_ = this->phase_ == TEST;
-
   size_t dim = 4, sizes[4], strides[4];
 
   channels_ = bottom[0]->channels();
diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp
index a777209dc..3052c09a2 100644
--- a/src/caffe/util/math_functions.cpp
+++ b/src/caffe/util/math_functions.cpp
@@ -94,15 +94,15 @@ void caffe_cpu_gemv<double>(const CBLAS_TRANSPOSE TransA, const int M,
 }
 
 template <>
-void caffe_axpy<float>(const int N, const float alpha, const float* X,
+void caffe_axpy<float>(const long N, const float alpha, const float* X,
     float* Y) { cblas_saxpy(N, alpha, X, 1, Y, 1); }
 
 template <>
-void caffe_axpy<double>(const int N, const double alpha, const double* X,
+void caffe_axpy<double>(const long N, const double alpha, const double* X,
     double* Y) { cblas_daxpy(N, alpha, X, 1, Y, 1); }
 
 template <typename Dtype>
-void caffe_set(const int N, const Dtype alpha, Dtype* Y) {
+void caffe_set(const size_t N, const Dtype alpha, Dtype* Y) {
   // If we are executing parallel region already then do not start another one
   // if also number of data to be processed is smaller than arbitrary:
   // threashold 12*4 cachelines per thread then no parallelization is to be made
@@ -126,7 +126,7 @@ void caffe_set(const int N, const Dtype alpha, Dtype* Y) {
 
   if (run_parallel) {
     #pragma omp parallel for
-    for (int i = 0; i < N; ++i) {
+    for (size_t i = 0; i < N; ++i) {
       Y[i] = alpha;
     }
 
@@ -142,28 +142,28 @@ void caffe_set(const int N, const Dtype alpha, Dtype* Y) {
   }
 }
 
-template void caffe_set<char>(const int N, const char alpha, char* Y);
-template void caffe_set<int>(const int N, const int alpha, int* Y);
-template void caffe_set<float>(const int N, const float alpha, float* Y);
-template void caffe_set<double>(const int N, const double alpha, double* Y);
-template void caffe_set<size_t>(const int N, const size_t alpha, size_t* Y);
+template void caffe_set<char>(const size_t N, const char alpha, char* Y);
+template void caffe_set<int>(const size_t N, const int alpha, int* Y);
+template void caffe_set<float>(const size_t N, const float alpha, float* Y);
+template void caffe_set<double>(const size_t N, const double alpha, double* Y);
+template void caffe_set<size_t>(const size_t N, const size_t alpha, size_t* Y);
 
 template <>
-void caffe_add_scalar(const int N, const float alpha, float* Y) {
-  for (int i = 0; i < N; ++i) {
+void caffe_add_scalar(const long N, const float alpha, float* Y) {
+  for (long i = 0; i < N; ++i) {
     Y[i] += alpha;
   }
 }
 
 template <>
-void caffe_add_scalar(const int N, const double alpha, double* Y) {
-  for (int i = 0; i < N; ++i) {
+void caffe_add_scalar(const long N, const double alpha, double* Y) {
+  for (long i = 0; i < N; ++i) {
     Y[i] += alpha;
   }
 }
 
 template <typename Dtype>
-void caffe_cpu_copy(const int N, const Dtype* X, Dtype* Y) {
+void caffe_cpu_copy(const size_t N, const Dtype* X, Dtype* Y) {
   if (X == Y) return;
 
 #ifdef _OPENMP
@@ -176,12 +176,12 @@ void caffe_cpu_copy(const int N, const Dtype* X, Dtype* Y) {
     (caffe::cpu::OpenMpManager::isMajorThread(boost::this_thread::get_id()));
 
   if (run_parallel) {
-    const int block_mem_size = 256*1024;
+    const int block_mem_size = 256 * 1024;
     const int block_size = block_mem_size / sizeof(Dtype);
     #pragma omp parallel for
-    for (int i = 0; i < N; i += block_size)
+    for (size_t i = 0; i < N; i += block_size)
       memcpy(Y + i, X + i,
-              (i + block_size > N) ? (N-i)*sizeof(Dtype): block_mem_size);
+              (i + block_size > N) ? (N - i) * sizeof(Dtype) : block_mem_size);
 
     return;
   }
@@ -190,14 +190,14 @@ void caffe_cpu_copy(const int N, const Dtype* X, Dtype* Y) {
   memcpy(Y, X, sizeof(Dtype) * N);  // NOLINT(caffe/alt_fn)
 }
 
-template void caffe_cpu_copy<int>(const int N, const int* X, int* Y);
-template void caffe_cpu_copy<unsigned int>(const int N, const unsigned int* X,
+template void caffe_cpu_copy<int>(const size_t N, const int* X, int* Y);
+template void caffe_cpu_copy<unsigned int>(const size_t N, const unsigned int* X,
     unsigned int* Y);
-template void caffe_cpu_copy<float>(const int N, const float* X, float* Y);
-template void caffe_cpu_copy<double>(const int N, const double* X, double* Y);
+template void caffe_cpu_copy<float>(const size_t N, const float* X, float* Y);
+template void caffe_cpu_copy<double>(const size_t N, const double* X, double* Y);
 
 template <typename Dtype>
-void caffe_copy(const int N, const Dtype* X, Dtype* Y) {
+void caffe_copy(const size_t N, const Dtype* X, Dtype* Y) {
   if (X != Y) {
 #ifndef CPU_ONLY
     if (
@@ -218,142 +218,142 @@ void caffe_copy(const int N, const Dtype* X, Dtype* Y) {
   }
 }
 
-template void caffe_copy<bool>(const int N, const bool* X, bool* Y);
-template void caffe_copy<int>(const int N, const int* X, int* Y);
-template void caffe_copy<unsigned int>(const int N, const unsigned int* X,
+template void caffe_copy<bool>(const size_t N, const bool* X, bool* Y);
+template void caffe_copy<int>(const size_t N, const int* X, int* Y);
+template void caffe_copy<unsigned int>(const size_t N, const unsigned int* X,
     unsigned int* Y);
-template void caffe_copy<float>(const int N, const float* X, float* Y);
-template void caffe_copy<double>(const int N, const double* X, double* Y);
-template void caffe_copy<char>(const int N, const char* X, char* Y);
-template void caffe_copy<size_t>(const int N, const size_t* X, size_t* Y);
+template void caffe_copy<float>(const size_t N, const float* X, float* Y);
+template void caffe_copy<double>(const size_t N, const double* X, double* Y);
+template void caffe_copy<char>(const size_t N, const char* X, char* Y);
+template void caffe_copy<size_t>(const size_t N, const size_t* X, size_t* Y);
 
 template <>
-void caffe_scal<float>(const int N, const float alpha, float *X) {
+void caffe_scal<float>(const long N, const float alpha, float *X) {
   cblas_sscal(N, alpha, X, 1);
 }
 
 template <>
-void caffe_scal<double>(const int N, const double alpha, double *X) {
+void caffe_scal<double>(const long N, const double alpha, double *X) {
   cblas_dscal(N, alpha, X, 1);
 }
 
 template <>
-void caffe_scal<size_t>(const int N, const size_t alpha, size_t *X) {
+void caffe_scal<size_t>(const long N, const size_t alpha, size_t *X) {
 }
 
 template <>
-void caffe_cpu_axpby<float>(const int N, const float alpha, const float* X,
+void caffe_cpu_axpby<float>(const long N, const float alpha, const float* X,
                             const float beta, float* Y) {
   cblas_saxpby(N, alpha, X, 1, beta, Y, 1);
 }
 
 template <>
-void caffe_cpu_axpby<double>(const int N, const double alpha, const double* X,
+void caffe_cpu_axpby<double>(const long N, const double alpha, const double* X,
                              const double beta, double* Y) {
   cblas_daxpby(N, alpha, X, 1, beta, Y, 1);
 }
 
 template <>
-void caffe_axpy<size_t>(const int N, const size_t alpha, const size_t* X,
+void caffe_axpy<size_t>(const long N, const size_t alpha, const size_t* X,
     size_t* Y) { }
 
 template <>
-void caffe_add<float>(const int n, const float* a, const float* b,
+void caffe_add<float>(const long n, const float* a, const float* b,
     float* y) {
   vsAdd(n, a, b, y);
 }
 
 template <>
-void caffe_add<double>(const int n, const double* a, const double* b,
+void caffe_add<double>(const long n, const double* a, const double* b,
     double* y) {
   vdAdd(n, a, b, y);
 }
 
 template <>
-void caffe_sub<float>(const int n, const float* a, const float* b,
+void caffe_sub<float>(const long n, const float* a, const float* b,
     float* y) {
   vsSub(n, a, b, y);
 }
 
 template <>
-void caffe_sub<double>(const int n, const double* a, const double* b,
+void caffe_sub<double>(const long n, const double* a, const double* b,
     double* y) {
   vdSub(n, a, b, y);
 }
 
 template <>
-void caffe_mul<float>(const int n, const float* a, const float* b,
+void caffe_mul<float>(const long n, const float* a, const float* b,
     float* y) {
   vsMul(n, a, b, y);
 }
 
 template <>
-void caffe_mul<double>(const int n, const double* a, const double* b,
+void caffe_mul<double>(const long n, const double* a, const double* b,
     double* y) {
   vdMul(n, a, b, y);
 }
 
 template <>
-void caffe_div<float>(const int n, const float* a, const float* b,
+void caffe_div<float>(const long n, const float* a, const float* b,
     float* y) {
   vsDiv(n, a, b, y);
 }
 
 template <>
-void caffe_div<double>(const int n, const double* a, const double* b,
+void caffe_div<double>(const long n, const double* a, const double* b,
     double* y) {
   vdDiv(n, a, b, y);
 }
 
 template <>
-void caffe_powx<float>(const int n, const float* a, const float b,
+void caffe_powx<float>(const long n, const float* a, const float b,
     float* y) {
   vsPowx(n, a, b, y);
 }
 
 template <>
-void caffe_powx<double>(const int n, const double* a, const double b,
+void caffe_powx<double>(const long n, const double* a, const double b,
     double* y) {
   vdPowx(n, a, b, y);
 }
 
 template <>
-void caffe_sqr<float>(const int n, const float* a, float* y) {
+void caffe_sqr<float>(const long n, const float* a, float* y) {
   vsSqr(n, a, y);
 }
 
 template <>
-void caffe_sqr<double>(const int n, const double* a, double* y) {
+void caffe_sqr<double>(const long n, const double* a, double* y) {
   vdSqr(n, a, y);
 }
 
 template <>
-void caffe_exp<float>(const int n, const float* a, float* y) {
+void caffe_exp<float>(const long n, const float* a, float* y) {
   vsExp(n, a, y);
 }
 
 template <>
-void caffe_exp<double>(const int n, const double* a, double* y) {
+void caffe_exp<double>(const long n, const double* a, double* y) {
   vdExp(n, a, y);
 }
 
 template <>
-void caffe_log<float>(const int n, const float* a, float* y) {
+void caffe_log<float>(const long n, const float* a, float* y) {
   vsLn(n, a, y);
 }
 
 template <>
-void caffe_log<double>(const int n, const double* a, double* y) {
+void caffe_log<double>(const long n, const double* a, double* y) {
   vdLn(n, a, y);
 }
 
 template <>
-void caffe_abs<float>(const int n, const float* a, float* y) {
+void caffe_abs<float>(const long n, const float* a, float* y) {
     vsAbs(n, a, y);
 }
 
 template <>
-void caffe_abs<double>(const int n, const double* a, double* y) {
+void caffe_abs<double>(const long n, const double* a, double* y) {
     vdAbs(n, a, y);
 }
 
@@ -378,28 +378,28 @@ template
 double caffe_nextafter(const double b);
 
 template <typename Dtype>
-void caffe_rng_uniform(const int n, const Dtype a, const Dtype b, Dtype* r) {
+void caffe_rng_uniform(const long n, const Dtype a, const Dtype b, Dtype* r) {
   CHECK_GE(n, 0);
   CHECK(r);
   CHECK_LE(a, b);
   boost::uniform_real<Dtype> random_distribution(a, caffe_nextafter<Dtype>(b));
   boost::variate_generator<caffe::rng_t*, boost::uniform_real<Dtype> >
       variate_generator(caffe_rng(), random_distribution);
-  for (int i = 0; i < n; ++i) {
+  for (long i = 0; i < n; ++i) {
     r[i] = variate_generator();
   }
 }
 
 template
-void caffe_rng_uniform<float>(const int n, const float a, const float b,
+void caffe_rng_uniform<float>(const long n, const float a, const float b,
                               float* r);
 
 template
-void caffe_rng_uniform<double>(const int n, const double a, const double b,
+void caffe_rng_uniform<double>(const long n, const double a, const double b,
                                double* r);
 
 template <typename Dtype>
-void caffe_rng_gaussian(const int n, const Dtype a,
+void caffe_rng_gaussian(const long n, const Dtype a,
                         const Dtype sigma, Dtype* r) {
   CHECK_GE(n, 0);
   CHECK(r);
@@ -407,21 +407,21 @@ void caffe_rng_gaussian(const int n, const Dtype a,
   boost::normal_distribution<Dtype> random_distribution(a, sigma);
   boost::variate_generator<caffe::rng_t*, boost::normal_distribution<Dtype> >
       variate_generator(caffe_rng(), random_distribution);
-  for (int i = 0; i < n; ++i) {
+  for (long i = 0; i < n; ++i) {
     r[i] = variate_generator();
   }
 }
 
 template
-void caffe_rng_gaussian<float>(const int n, const float mu,
+void caffe_rng_gaussian<float>(const long n, const float mu,
                                const float sigma, float* r);
 
 template
-void caffe_rng_gaussian<double>(const int n, const double mu,
+void caffe_rng_gaussian<double>(const long n, const double mu,
                                 const double sigma, double* r);
 
 #ifdef USE_MKL
-static void bernoulli_generate(int n, double p, int* r) {
+static void bernoulli_generate(long n, double p, int* r) {
   int seed = 17 + caffe_rng_rand() % 4096;
 
 #ifdef _OPENMP
@@ -436,13 +436,13 @@ static void bernoulli_generate(int n, double p, int* r) {
 # pragma omp parallel num_threads(nthr)
   {
     const int ithr = omp_get_thread_num();
-    const int avg_amount = (n + nthr - 1) / nthr;
-    const int my_offset = ithr * avg_amount;
-    const int my_amount = std::min(my_offset + avg_amount, n) - my_offset;
+    const long avg_amount = (n + nthr - 1) / nthr;
+    const long my_offset = ithr * avg_amount;
+    const long my_amount = std::min(my_offset + avg_amount, n) - my_offset;
 #else
   {
-    const int my_amount = n;
-    const int my_offset = 0;
+    const long my_amount = n;
+    const long my_offset = 0;
 #endif
 
     if (my_amount > 0) {
@@ -458,7 +458,7 @@ static void bernoulli_generate(int n, double p, int* r) {
 #endif
 
 template <typename Dtype>
-void caffe_rng_bernoulli(const int n, const Dtype p, int* r) {
+void caffe_rng_bernoulli(const long n, const Dtype p, int* r) {
   CHECK_GE(n, 0);
   CHECK(r);
   CHECK_GE(p, 0);
@@ -469,20 +469,20 @@ void caffe_rng_bernoulli(const int n, const Dtype p, int* r) {
   boost::bernoulli_distribution<Dtype> random_distribution(p);
   boost::variate_generator<caffe::rng_t*, boost::bernoulli_distribution<Dtype> >
       variate_generator(caffe_rng(), random_distribution);
-  for (int i = 0; i < n; ++i) {
+  for (long i = 0; i < n; ++i) {
     r[i] = variate_generator();
   }
 #endif
 }
 
 template
-void caffe_rng_bernoulli<double>(const int n, const double p, int* r);
+void caffe_rng_bernoulli<double>(const long n, const double p, int* r);
 
 template
-void caffe_rng_bernoulli<float>(const int n, const float p, int* r);
+void caffe_rng_bernoulli<float>(const long n, const float p, int* r);
 
 template <typename Dtype>
-void caffe_rng_bernoulli(const int n, const Dtype p, unsigned int* r) {
+void caffe_rng_bernoulli(const long n, const Dtype p, unsigned int* r) {
   CHECK_GE(n, 0);
   CHECK(r);
   CHECK_GE(p, 0);
@@ -493,74 +493,76 @@ void caffe_rng_bernoulli(const int n, const Dtype p, unsigned int* r) {
   boost::bernoulli_distribution<Dtype> random_distribution(p);
   boost::variate_generator<caffe::rng_t*, boost::bernoulli_distribution<Dtype> >
       variate_generator(caffe_rng(), random_distribution);
-  for (int i = 0; i < n; ++i) {
+  for (long i = 0; i < n; ++i) {
     r[i] = static_cast<unsigned int>(variate_generator());
   }
 #endif
 }
 
 template
-void caffe_rng_bernoulli<double>(const int n, const double p, unsigned int* r);
+void caffe_rng_bernoulli<double>(const long n, const double p, unsigned int* r);
 
 template
-void caffe_rng_bernoulli<float>(const int n, const float p, unsigned int* r);
+void caffe_rng_bernoulli<float>(const long n, const float p, unsigned int* r);
 
 template <>
-float caffe_cpu_strided_dot<float>(const int n, const float* x, const int incx,
+float caffe_cpu_strided_dot<float>(const long n, const float* x, const int incx,
     const float* y, const int incy) {
   return cblas_sdot(n, x, incx, y, incy);
 }
 
 template <>
-double caffe_cpu_strided_dot<double>(const int n, const double* x,
+double caffe_cpu_strided_dot<double>(const long n, const double* x,
     const int incx, const double* y, const int incy) {
   return cblas_ddot(n, x, incx, y, incy);
 }
 
 template <>
-size_t caffe_cpu_strided_dot<size_t>(const int n, const size_t* x,
+size_t caffe_cpu_strided_dot<size_t>(const long n, const size_t* x,
         const int incx, const size_t* y, const int incy) {
+  NOT_IMPLEMENTED;
   return 0;
 }
 
 template <typename Dtype>
-Dtype caffe_cpu_dot(const int n, const Dtype* x, const Dtype* y) {
+Dtype caffe_cpu_dot(const long n, const Dtype* x, const Dtype* y) {
   return caffe_cpu_strided_dot(n, x, 1, y, 1);
 }
 
 template
-float caffe_cpu_dot<float>(const int n, const float* x, const float* y);
+float caffe_cpu_dot<float>(const long n, const float* x, const float* y);
 
 template
-double caffe_cpu_dot<double>(const int n, const double* x, const double* y);
+double caffe_cpu_dot<double>(const long n, const double* x, const double* y);
 
 template
-size_t caffe_cpu_dot<size_t>(const int n, const size_t* x, const size_t* y);
+size_t caffe_cpu_dot<size_t>(const long n, const size_t* x, const size_t* y);
 
 template <>
-float caffe_cpu_asum<float>(const int n, const float* x) {
+float caffe_cpu_asum<float>(const long n, const float* x) {
   return cblas_sasum(n, x, 1);
 }
 
 template <>
-double caffe_cpu_asum<double>(const int n, const double* x) {
+double caffe_cpu_asum<double>(const long n, const double* x) {
   return cblas_dasum(n, x, 1);
 }
 
 template <>
-size_t caffe_cpu_asum<size_t>(const int n, const size_t* x) {
+size_t caffe_cpu_asum<size_t>(const long n, const size_t* x) {
+  NOT_IMPLEMENTED;
   return 0;
 }
 
 template <>
-void caffe_cpu_scale<float>(const int n, const float alpha, const float *x,
+void caffe_cpu_scale<float>(const long n, const float alpha, const float *x,
                             float* y) {
   cblas_scopy(n, x, 1, y, 1);
   cblas_sscal(n, alpha, y, 1);
 }
 
 template <>
-void caffe_cpu_scale<double>(const int n, const double alpha, const double *x,
+void caffe_cpu_scale<double>(const long n, const double alpha, const double *x,
                              double* y) {
   cblas_dcopy(n, x, 1, y, 1);
   cblas_dscal(n, alpha, y, 1);

From e58f2388a9bcd7f2d3d2fa51685402b5376a3d98 Mon Sep 17 00:00:00 2001
From: "Yu, Chong" <chong.yu@intel.com>
Date: Thu, 22 Jun 2017 16:49:11 +0800
Subject: [PATCH 21/34] 1. Change the tab format. 2. Change the MLSLROOT to
 MLSL_ROOT.

---
 external/mlsl/prepare_mlsl.sh | 26 +++++++++++++-------------
 src/caffe/blob.cpp            | 10 +++++-----
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/external/mlsl/prepare_mlsl.sh b/external/mlsl/prepare_mlsl.sh
index e5e01e559..b1c76b281 100755
--- a/external/mlsl/prepare_mlsl.sh
+++ b/external/mlsl/prepare_mlsl.sh
@@ -42,12 +42,12 @@ MLSL_CONTENT_DIR=`echo $ARCHIVE_BASENAME | rev | cut -d "." -f 2- | rev`
 GITHUB_RELEASE_TAG=v2017-Preview
 
 MLSLURL="https://github.com/01org/MLSL/releases/download/$GITHUB_RELEASE_TAG/$ARCHIVE_BASENAME"
-#echo "[Debug] MLSLROOT value: $MLSLROOT"
-VERSION_LINE=`GetVersionName $MLSLROOT`
+#echo "[Debug] MLSL_ROOT value: $MLSL_ROOT"
+VERSION_LINE=`GetVersionName $MLSL_ROOT`
 #echo "[Debug] VERSION_LINE value: $VERSION_LINE"
-# Check if MLSLROOT is set if positive then set one will be used..
-if [ -z $MLSLROOT ] || [ $VERSION_LINE -lt $VERSION_MATCH ]; then
-  # ..if MLSLROOT is not set then check if we have MLSL unpacked and installed in proper version
+# Check if MLSL_ROOT is set if positive then set one will be used..
+if [ -z $MLSL_ROOT ] || [ $VERSION_LINE -lt $VERSION_MATCH ]; then
+  # ..if MLSL_ROOT is not set then check if we have MLSL unpacked and installed in proper version
   FindLibrary $DST
   #echo "[Debug] LOCALMLSL value inside if: $LOCALMLSL"
   if [ $LOCALMLSL ]; then
@@ -58,8 +58,8 @@ if [ -z $MLSLROOT ] || [ $VERSION_LINE -lt $VERSION_MATCH ]; then
   fi
   #echo "[Debug] VERSION_LINE value inside if: $VERSION_LINE"
 
-  #if MLSLROOT is not set 
-  if [ -z $MLSLROOT ] ; then
+  #if MLSL_ROOT is not set 
+  if [ -z $MLSL_ROOT ] ; then
     #if version is not given, or the version is lower than expected version 
     if [ $VERSION_LINE -lt $VERSION_MATCH ] ; then
       #Then downloaded, unpacked and installed
@@ -69,18 +69,18 @@ if [ -z $MLSLROOT ] || [ $VERSION_LINE -lt $VERSION_MATCH ]; then
       #install.sh did not support the relative path as the parameter
       bash $DST/install.sh -s -d $ABS_DST/$ARCHIVE_INSTALL_FOLDERNAME
     fi
-    #else: version is just our expected version, no need to donload again, but need to set the MLSLROOT
-    #do not change the value of MLSLROOT if MLSLROOT is set, but version is not given
+    #else: version is just our expected version, no need to donload again, but need to set the MLSL_ROOT
+    #do not change the value of MLSL_ROOT if MLSL_ROOT is set, but version is not given
     FindLibrary $DST
     #echo "[Debug] LOCALMLSL value: $LOCALMLSL"
     #echo "[Debug] PWD value: $PWD"
-    MLSLROOT=$PWD/`echo $LOCALMLSL | sed -e 's/intel64.*$//'`
+    MLSL_ROOT=$PWD/`echo $LOCALMLSL | sed -e 's/intel64.*$//'`
   else
-    #if MLSLROOT is set, but version is not given, or the version is lower than expected version
+    #if MLSL_ROOT is set, but version is not given, or the version is lower than expected version
     #not to download our own version, and just use mlsl as the return value of LIBRARIES
     LIBRARIES="mlsl"
   fi
-  #echo "[Debug] MLSLROOT value: $MLSLROOT"
+  #echo "[Debug] MLSL_ROOT value: $MLSL_ROOT"
 fi
 
 #The simplest implementation of LIBRARIES return value
@@ -88,4 +88,4 @@ LIBRARIES="mlsl"
 #echo "[Debug] LIBRARIES value: $LIBRARIES"
 
 # return value to calling script (Makefile,cmake)
-echo $MLSLROOT $LIBRARIES
+echo $MLSL_ROOT $LIBRARIES
diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp
index 96a3f2341..dd5546bde 100644
--- a/src/caffe/blob.cpp
+++ b/src/caffe/blob.cpp
@@ -319,11 +319,11 @@ Dtype Blob<Dtype>::asum_data() const {
   switch (data_->head()) {
   case SyncedMemory::SYNCED_PRV:
       {
-          const Dtype* prv_ptr = prv_data();
-          if (prv_ptr == NULL)
-              return caffe_cpu_asum(count_, cpu_data());
-          else
-              return caffe_cpu_asum(prv_data_count(), prv_data());
+        const Dtype* prv_ptr = prv_data();
+        if (prv_ptr == NULL)
+          return caffe_cpu_asum(count_, cpu_data());
+        else
+          return caffe_cpu_asum(prv_data_count(), prv_data());
       }
   case SyncedMemory::HEAD_AT_PRV:
     return caffe_cpu_asum(prv_data_count(), prv_data());

From 6a614563bca835df4e01d06aef7cbd06bf75923f Mon Sep 17 00:00:00 2001
From: linxinan <xinan.lin@intel.com>
Date: Thu, 22 Jun 2017 15:31:01 +0800
Subject: [PATCH 22/34] Add option for average pooling include/exclude padding,
 and default is include.

Change-Id: Id69a653ea65e6282e8cdc83edcfcfd2f4d736b85
---
 include/caffe/layers/mkl_layers.hpp       |  1 +
 src/caffe/layers/mkl_pooling_layer.cpp    | 44 ++++++++++++-----------
 src/caffe/layers/mkldnn_pooling_layer.cpp | 26 ++++++++++----
 src/caffe/proto/caffe.proto               |  1 +
 4 files changed, 45 insertions(+), 27 deletions(-)

diff --git a/include/caffe/layers/mkl_layers.hpp b/include/caffe/layers/mkl_layers.hpp
index 10ed1cda9..44d44797c 100644
--- a/include/caffe/layers/mkl_layers.hpp
+++ b/include/caffe/layers/mkl_layers.hpp
@@ -261,6 +261,7 @@ class MKLPoolingLayer : public Layer<Dtype> {
   int height_, width_;
   int pooled_height_, pooled_width_;
   bool global_pooling_;
+  dnnAlgorithm_t algorithm;
   Blob<Dtype> rand_idx_;
   Blob<size_t> max_idx_;
 
diff --git a/src/caffe/layers/mkl_pooling_layer.cpp b/src/caffe/layers/mkl_pooling_layer.cpp
index d5eadba6e..1e654b63d 100644
--- a/src/caffe/layers/mkl_pooling_layer.cpp
+++ b/src/caffe/layers/mkl_pooling_layer.cpp
@@ -161,6 +161,26 @@ void MKLPoolingLayer<Dtype>::Init(
       pooled_width_);
   }
 
+  switch (this->layer_param_.pooling_param().pool()) {
+  case PoolingParameter_PoolMethod_MAX:
+    this->algorithm = dnnAlgorithmPoolingMax;
+    break;
+  case PoolingParameter_PoolMethod_AVE:
+    if (this->layer_param_.pooling_param().avg_include_pad()) {
+        this->algorithm = dnnAlgorithmPoolingAvgIncludePadding;
+    }
+    else {
+        this->algorithm = dnnAlgorithmPoolingAvgExcludePadding;
+    }
+    break;
+  case PoolingParameter_PoolMethod_STOCHASTIC:
+    NOT_IMPLEMENTED;
+    break;
+  default:
+    LOG(FATAL) << "Unknown pooling method.";
+  }
+
+
   size_t dim = 4;
   size_t src_sizes[4], src_strides[4];
   size_t dst_sizes[4], dst_strides[4];
@@ -239,22 +259,6 @@ void MKLPoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 
   // We'll output the mask to top[1] if it's of size >1.
   const bool use_top_mask = top.size() > 1;
-  dnnAlgorithm_t algorithm;
-
-  switch (this->layer_param_.pooling_param().pool()) {
-  case PoolingParameter_PoolMethod_MAX:
-    algorithm = dnnAlgorithmPoolingMax;
-    break;
-  case PoolingParameter_PoolMethod_AVE:
-    algorithm = dnnAlgorithmPoolingAvg;
-    break;
-  case PoolingParameter_PoolMethod_STOCHASTIC:
-    NOT_IMPLEMENTED;
-    break;
-  default:
-    LOG(FATAL) << "Unknown pooling method.";
-  }
-
   dnnError_t status;
   void* pooling_res[dnnResourceNumber];
 
@@ -271,13 +275,13 @@ void MKLPoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
     if (NULL == poolingFwd) {
       // Now create poolingFwd
       status = dnnPoolingCreateForward<Dtype>(&poolingFwd, NULL,
-              algorithm, fwd_bottom_data->layout_usr,
+              this->algorithm, fwd_bottom_data->layout_usr,
               kernel_size, kernel_stride, src_offset, dnnBorderZeros);
       CHECK_EQ(status, E_SUCCESS);
 
       // Now create poolingBwd
       status = dnnPoolingCreateBackward<Dtype>(&poolingBwd, NULL,
-              algorithm, fwd_bottom_data->layout_usr,
+              this->algorithm, fwd_bottom_data->layout_usr,
               kernel_size, kernel_stride, src_offset, dnnBorderZeros);
       CHECK_EQ(status, E_SUCCESS);
     }
@@ -298,7 +302,7 @@ void MKLPoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 
     // Now create poolingFwd
     status = dnnPoolingCreateForward<Dtype>(&poolingFwd, NULL,
-            algorithm, fwd_bottom_data->layout_int, kernel_size,
+            this->algorithm, fwd_bottom_data->layout_int, kernel_size,
             kernel_stride, src_offset, dnnBorderZeros);
     CHECK_EQ(status, E_SUCCESS);
 
@@ -306,7 +310,7 @@ void MKLPoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 
     // Now create poolingBwd
     status = dnnPoolingCreateBackward<Dtype>(&poolingBwd, NULL,
-            algorithm, fwd_bottom_data->layout_int, kernel_size,
+            this->algorithm, fwd_bottom_data->layout_int, kernel_size,
             kernel_stride, src_offset, dnnBorderZeros);
     CHECK_EQ(status, E_SUCCESS);
 
diff --git a/src/caffe/layers/mkldnn_pooling_layer.cpp b/src/caffe/layers/mkldnn_pooling_layer.cpp
index 27fd4723e..849abd0f3 100644
--- a/src/caffe/layers/mkldnn_pooling_layer.cpp
+++ b/src/caffe/layers/mkldnn_pooling_layer.cpp
@@ -175,7 +175,11 @@ void MKLDNNPoolingLayer<Dtype>::InitPoolingFwd(const vector<Blob<Dtype>*>& botto
         pooling_algorithm = algorithm::pooling_max;
         break;
     case PoolingParameter_PoolMethod_AVE:
-        pooling_algorithm = algorithm::pooling_avg;
+        if (this->layer_param_.pooling_param().avg_include_pad()) {
+            pooling_algorithm = algorithm::pooling_avg_include_padding;
+        }else {
+            pooling_algorithm = algorithm::pooling_avg_exclude_padding;
+        }
         break;
     case PoolingParameter_PoolMethod_STOCHASTIC:
         NOT_IMPLEMENTED;
@@ -271,7 +275,9 @@ void MKLDNNPoolingLayer<Dtype>::InitPoolingFwd(const vector<Blob<Dtype>*>& botto
     fwd_top_data.reset(new MKLDNNData<Dtype>(usr_top_data_mpd, prv_fwd_top_data_mpd, top[0], this));
     fwd_top_data_memory = fwd_top_data->create_output_memory();
 
-    if ( propagation == prop_kind::forward_training && pooling_algorithm != algorithm::pooling_avg) {
+    if ( propagation == prop_kind::forward_training &&
+            pooling_algorithm != algorithm::pooling_avg_exclude_padding &&
+            pooling_algorithm != algorithm::pooling_avg_include_padding) {
         indices_pd.reset(new MemPD(poolingFwd_pd->workspace_primitive_desc()));
         indices_memory.reset(new memory(*indices_pd, reinterpret_cast<void *>(mask)));
         poolingFwd.reset(new pooling_forward(*poolingFwd_pd, *fwd_bottom_data_primitive, *fwd_top_data_memory, *indices_memory));
@@ -319,7 +325,12 @@ void MKLDNNPoolingLayer<Dtype>::InitPoolingBwd(const vector<Blob<Dtype>*>& top
         pooling_algorithm = algorithm::pooling_max;
         break;
     case PoolingParameter_PoolMethod_AVE:
-        pooling_algorithm = algorithm::pooling_avg;
+        if (this->layer_param_.pooling_param().avg_include_pad()) {
+            pooling_algorithm = algorithm::pooling_avg_include_padding;
+        }else {
+            pooling_algorithm = algorithm::pooling_avg_exclude_padding;
+        }
+
         break;
     case PoolingParameter_PoolMethod_STOCHASTIC:
         NOT_IMPLEMENTED;
@@ -346,7 +357,7 @@ void MKLDNNPoolingLayer<Dtype>::InitPoolingBwd(const vector<Blob<Dtype>*>& top
 
     int32_t pr = this->pad_r_;
     int32_t pl = this->pad_l_;
-    
+
     bool top_diff_is_prv = (const_cast<Dtype*>(top[0]->prv_diff()) != NULL);
 
     engine cpu_engine = CpuEngine::Instance().get_engine();
@@ -364,7 +375,7 @@ void MKLDNNPoolingLayer<Dtype>::InitPoolingBwd(const vector<Blob<Dtype>*>& top
             = get_mkldnn_prv_descriptor<Dtype, true>(top[0]);
         bwd_cmfmt = static_cast<memory::format>(mem_descr->prv_memory_pd()->desc().data.format);
     }
-    
+
     bool bottom_data_is_prv = (const_cast<Dtype*>(bottom[0]->prv_data()) != NULL);
     if (bottom_data_is_prv) {
         shared_ptr<MKLDNNMemoryDescriptor<Dtype, false> > mem_descr
@@ -372,7 +383,7 @@ void MKLDNNPoolingLayer<Dtype>::InitPoolingBwd(const vector<Blob<Dtype>*>& top
         memory::format fwd_prv_bottom_data_mfmt = static_cast<memory::format>(mem_descr->prv_memory_pd()->desc().data.format);
 #ifdef DEBUG
         LOG(INFO) << "MKLDNNPoolingLayer<Dtype>::InitPoolingBwd: memory format of prv bottom data is: " << fwd_prv_bottom_data_mfmt;
-        LOG(INFO) << "MKLDNNPoolingLayer<Dtype>::InitPoolingBwd: Reorder the top and bottom diff to the format of prv bottom data! (Performance consideration)";              
+        LOG(INFO) << "MKLDNNPoolingLayer<Dtype>::InitPoolingBwd: Reorder the top and bottom diff to the format of prv bottom data! (Performance consideration)";
 #endif
         bwd_cmfmt = fwd_prv_bottom_data_mfmt;
     }
@@ -421,7 +432,8 @@ void MKLDNNPoolingLayer<Dtype>::InitPoolingBwd(const vector<Blob<Dtype>*>& top
     bwd_top_diff->name = "bwd_top_diff_data   @ " + this->layer_param_.name();
     bwd_top_diff_primitive = bwd_top_diff->create_input(false);
 
-    if (pooling_algorithm != algorithm::pooling_avg)
+    if (pooling_algorithm != algorithm::pooling_avg_include_padding &&
+         pooling_algorithm != algorithm::pooling_avg_exclude_padding)
         poolingBwd.reset(new pooling_backward(*poolingBwd_pd,
                     *bwd_top_diff_primitive, *indices_memory,
                     *bwd_bottom_diff_memory));
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index cc9ee65c9..9f9b0443e 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -1458,6 +1458,7 @@ message PoolingParameter {
     AVE = 1;
     STOCHASTIC = 2;
   }
+  optional bool avg_include_pad = 13 [default = true];
   optional PoolMethod pool = 1 [default = MAX]; // The pooling method
   // Pad, kernel size, and stride are all given as a single value for equal
   // dimensions in height and width or as Y, X pairs.

From 31ef95eeb4bd5edfa29e00fce67c547ff9801e5e Mon Sep 17 00:00:00 2001
From: "Yu, Chong" <chong.yu@intel.com>
Date: Thu, 22 Jun 2017 22:12:18 +0800
Subject: [PATCH 23/34] Fix the problem that VERSION_LINE may not be an integer
 if the MLSL_ROOT is provided by User.

---
 external/mlsl/prepare_mlsl.sh | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/external/mlsl/prepare_mlsl.sh b/external/mlsl/prepare_mlsl.sh
index b1c76b281..72fdb95f5 100755
--- a/external/mlsl/prepare_mlsl.sh
+++ b/external/mlsl/prepare_mlsl.sh
@@ -25,6 +25,10 @@ fi
 if [ -z $VERSION_LINE ]; then
   VERSION_LINE=0
 fi
+if [ -z "$(echo $VERSION_LINE | sed -n "/^[0-9]\+$/p")" ];then 
+  #echo "[Debug] VERSION_LINE value contains other string or flags, not only numbers"
+  VERSION_LINE=0  
+fi 
 echo $VERSION_LINE  # Return Version Line
 }
 
@@ -54,6 +58,8 @@ if [ -z $MLSL_ROOT ] || [ $VERSION_LINE -lt $VERSION_MATCH ]; then
     #in order to return value to calling script (Makefile,cmake), cannot print other info
     #echo "[Debug] Some verison of MLSL is unpacked and installed"
     MLSL_PREVIOUS_CONTENT_DIR=`echo $LOCALMLSL | rev | cut -d "/" -f 4- | cut -d "/" -f -1 | rev`
+    #echo "[Debug] MLSL_PREVIOUS_CONTENT_DIR value: $MLSL_PREVIOUS_CONTENT_DIR"
+    #echo "[Debug] DST/MLSL_PREVIOUS_CONTENT_DIR value: $DST/$MLSL_PREVIOUS_CONTENT_DIR"
     VERSION_LINE=`GetVersionName $DST/$MLSL_PREVIOUS_CONTENT_DIR`
   fi
   #echo "[Debug] VERSION_LINE value inside if: $VERSION_LINE"

From 5bb522e42f7931613eb5f97bc70f7cdf374a5367 Mon Sep 17 00:00:00 2001
From: "Gong, Jiong" <jiong.gong@intel.com>
Date: Fri, 23 Jun 2017 01:06:38 +0800
Subject: [PATCH 24/34] fix BN bug of multinode

---
 include/caffe/layer.hpp                    |  3 +--
 include/caffe/layers/batch_norm_layer.hpp  |  7 +++----
 include/caffe/layers/mkl_layers.hpp        |  7 +++----
 include/caffe/layers/mkldnn_layers.hpp     |  6 +++---
 include/caffe/multinode/apply_mn_param.hpp |  7 +++++++
 include/caffe/multinode/multi_sync.hpp     |  2 ++
 src/caffe/layer.cpp                        |  9 ++-------
 src/caffe/multinode/apply_mn_param.cpp     | 19 +++++++++++++++++++
 src/caffe/net.cpp                          | 14 +-------------
 9 files changed, 41 insertions(+), 33 deletions(-)

diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp
index 9b535d811..45d65c799 100644
--- a/include/caffe/layer.hpp
+++ b/include/caffe/layer.hpp
@@ -120,6 +120,7 @@ class Layer {
 public:
 	MLSL::Operation *layerOp{ nullptr };
   mn::Distribution &GetDistribution();
+  virtual bool ParamNeedReduce(int param_id) { return true; }
 
 protected:
   virtual bool Bypass(const vector<Blob<Dtype>*>& bottom,
@@ -128,8 +129,6 @@ class Layer {
   virtual void MultinodeSetUp(const vector<Blob<Dtype>*>& bottom,
                               const vector<Blob<Dtype>*>& top);
 
-  virtual bool ParamNeedReduce(int param_id) { return true; }
-
 #endif /* USE_MLSL */
 
  public:
diff --git a/include/caffe/layers/batch_norm_layer.hpp b/include/caffe/layers/batch_norm_layer.hpp
index 2cea50aaa..e83bab953 100644
--- a/include/caffe/layers/batch_norm_layer.hpp
+++ b/include/caffe/layers/batch_norm_layer.hpp
@@ -89,6 +89,9 @@ class BatchNormLayer : public Layer<Dtype> {
   virtual inline const char* type() const { return "BatchNorm"; }
   virtual inline int ExactNumBottomBlobs() const { return 1; }
   virtual inline int ExactNumTopBlobs() const { return 1; }
+#ifdef USE_MLSL
+  virtual bool ParamNeedReduce(int param_id) { return false; }
+#endif
 
  protected:
   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
@@ -114,10 +117,6 @@ class BatchNormLayer : public Layer<Dtype> {
                        const Dtype* data_to_be_replicated,
                        FuncTy op_func);
 
-#ifdef USE_MLSL
-  virtual bool ParamNeedReduce(int param_id) { return false; }
-#endif
-
   Blob<Dtype> mean_, variance_, temp_, x_norm_;
   bool use_global_stats_;
   Dtype moving_average_fraction_;
diff --git a/include/caffe/layers/mkl_layers.hpp b/include/caffe/layers/mkl_layers.hpp
index 10ed1cda9..294ae325b 100644
--- a/include/caffe/layers/mkl_layers.hpp
+++ b/include/caffe/layers/mkl_layers.hpp
@@ -424,6 +424,9 @@ class MKLBatchNormLayer : public Layer<Dtype> {
   virtual inline const char* type() const { return "BatchNorm"; }
   virtual inline int ExactNumBottomBlobs() const { return 1; }
   virtual inline int ExactNumTopBlobs() const { return 1; }
+#ifdef USE_MLSL
+  virtual bool ParamNeedReduce(int param_id) { return param_id >= 3; }
+#endif
 
  protected:
   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
@@ -438,10 +441,6 @@ class MKLBatchNormLayer : public Layer<Dtype> {
   void Init(const vector<Blob<Dtype>*>& bottom,
             const vector<Blob<Dtype>*>& top);
 
-#ifdef USE_MLSL
-  virtual bool ParamNeedReduce(int param_id) { return param_id >= 3; }
-#endif
-
   Dtype moving_average_fraction_;
   Dtype eps_;
   bool use_weight_bias_;
diff --git a/include/caffe/layers/mkldnn_layers.hpp b/include/caffe/layers/mkldnn_layers.hpp
index 57be322d4..c4afb2517 100644
--- a/include/caffe/layers/mkldnn_layers.hpp
+++ b/include/caffe/layers/mkldnn_layers.hpp
@@ -77,6 +77,9 @@ class MKLDNNBatchNormLayer : public MKLDNNLayer<Dtype>, public Layer<Dtype> {
           PERFORMANCE_EVENT_ID_RESET(perf_id_bw_);
     }
     ~MKLDNNBatchNormLayer() {}
+#ifdef USE_MLSL
+    virtual bool ParamNeedReduce(int param_id) { return param_id >= 3; }
+#endif
 
 protected:
     virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
@@ -88,9 +91,6 @@ class MKLDNNBatchNormLayer : public MKLDNNLayer<Dtype>, public Layer<Dtype> {
                                 , const vector<Blob<Dtype>*>& bottom);
     virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down
                                 , const vector<Blob<Dtype>*>& bottom);
-#ifdef USE_MLSL
-    virtual bool ParamNeedReduce(int param_id) { return param_id >= 3; }
-#endif
 private:
     void InitBatchNorm(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
     void InitBatchNormBwd(const vector<Blob<Dtype>*>& top,
diff --git a/include/caffe/multinode/apply_mn_param.hpp b/include/caffe/multinode/apply_mn_param.hpp
index df48bd83e..1613909de 100644
--- a/include/caffe/multinode/apply_mn_param.hpp
+++ b/include/caffe/multinode/apply_mn_param.hpp
@@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifdef USE_MLSL
 
 #include "caffe/proto/caffe.pb.h"
+#include "caffe/net.hpp"
 
 namespace caffe {
 /**
@@ -51,6 +52,12 @@ template <typename Dtype>
 void ApplyMultinodeParams(const NetParameter& param,
     NetParameter* param_with_mn);
 
+/**
+ * @brief Copy per-layer parameters from a Net object.
+ */
+template <typename Dtype>
+void CopyMultinodeParamsFromNet(const Net<Dtype> *net, NetParameter *param);
+
 /**
  * @brief Revert all the multinode changes from NetParameter
  */
diff --git a/include/caffe/multinode/multi_sync.hpp b/include/caffe/multinode/multi_sync.hpp
index b6e29c57b..2d4c566ae 100644
--- a/include/caffe/multinode/multi_sync.hpp
+++ b/include/caffe/multinode/multi_sync.hpp
@@ -203,6 +203,7 @@ namespace caffe {
 
       std::vector<int> &param_ids = layer_param_ids[layer_id];
       for (int i = 0; i < param_ids.size(); ++i) {
+        if (!layer->ParamNeedReduce(param_ids[i])) continue;
         if (CAN_USE_PRV(net_params[param_ids[i]])) {
           layer->layerOp->GetParameterSet(i)->StartGradientComm((void *) net_params[param_ids[i]]->mutable_prv_diff());
         } else {
@@ -220,6 +221,7 @@ namespace caffe {
       std::vector<int> &param_ids = layer_param_ids[layer_id];
 
       for (int i=0; i<param_ids.size(); i++) {
+        if (!layer->ParamNeedReduce(param_ids[i])) continue;
         Dtype *delwt_buf{(Dtype *) layer->layerOp->GetParameterSet(i)->WaitGradientComm()};
         if (delwt_buf) {
           if (CAN_USE_PRV(net_params[param_ids[i]])) {
diff --git a/src/caffe/layer.cpp b/src/caffe/layer.cpp
index df8f68d38..ad6422694 100644
--- a/src/caffe/layer.cpp
+++ b/src/caffe/layer.cpp
@@ -87,15 +87,13 @@ void Layer<Dtype>::MultinodeSetUp(const vector<Blob<Dtype>*>& bottom,
   mn::GetCanonicalMnParam(num_nodes, model_parts);
   int data_parts = num_nodes / model_parts;
 
-  if (data_parts <= 1) return;
+  if (data_parts <= 1 || this->blobs_.size() == 0) return;
 
   // We only initialize data parallelism here so operation type is
   // irrelevant here, hard-code to OT_CC
   mn::OpRegInfo reg_info(mn::train::get_session(), MLSL::OT_CC);
   reg_info.set_name(this->layer_param().name());
-  bool has_parameters = false;
   for (int i = 0; i < this->blobs_.size(); i++) {
-    if (!ParamNeedReduce(i)) continue;
     int hw = 1, ic = 1, oc = 1;
     const vector<int> &shape = this->blobs_[i]->shape();
     CHECK_GT(shape.size(), 0);
@@ -105,11 +103,8 @@ void Layer<Dtype>::MultinodeSetUp(const vector<Blob<Dtype>*>& bottom,
     // Note that MLSL expects the entire weights from a model group.
     // So we should multiply by model_parts here.
     reg_info.add_parameter_set<Dtype>(ic * oc * model_parts, hw);
-    has_parameters = true;
-  }
-  if (has_parameters) {
-    this->layerOp = mn::train::add_operation(reg_info, this->GetDistribution());
   }
+  this->layerOp = mn::train::add_operation(reg_info, this->GetDistribution());
 }
 #endif
 
diff --git a/src/caffe/multinode/apply_mn_param.cpp b/src/caffe/multinode/apply_mn_param.cpp
index a2ec34765..29e32079b 100644
--- a/src/caffe/multinode/apply_mn_param.cpp
+++ b/src/caffe/multinode/apply_mn_param.cpp
@@ -253,6 +253,23 @@ void ApplyMultinodeParams(const NetParameter& param,
   }
 }
 
+template <typename Dtype>
+void CopyMultinodeParamsFromNet(const Net<Dtype> *net, NetParameter *param) {
+  // set per-layer multi-node parameters before adjusting net proto
+  for (int i = 0; i < param->layer_size(); i++) {
+    LayerParameter* source_layer = param->mutable_layer(i);
+    const string& source_layer_name = source_layer->name();
+    int target_layer_id = 0;
+    while (target_layer_id != net->layer_names().size() &&
+           net->layer_names()[target_layer_id] != source_layer_name) {
+      ++target_layer_id;
+    }
+    if (target_layer_id == net->layer_names().size()) continue;
+    *source_layer->mutable_multinode() =
+      net->layers()[target_layer_id]->layer_param().multinode();
+  }
+}
+
 template <typename Dtype>
 void RevertMultinodeParams(NetParameter* param, bool write_diff) {
   NetParameter orig_param;
@@ -315,6 +332,8 @@ template void ApplyMultinodeParams<float>(const NetParameter& param,
     NetParameter* param_with_mn);
 template void ApplyMultinodeParams<double>(const NetParameter& param,
     NetParameter* param_with_mn);
+template void CopyMultinodeParamsFromNet<float>(const Net<float> *net, NetParameter *param);
+template void CopyMultinodeParamsFromNet<double>(const Net<double> *net, NetParameter *param);
 template void RevertMultinodeParams<float>(NetParameter* param, bool write_diff);
 template void RevertMultinodeParams<double>(NetParameter* param, bool write_diff);
 } // namespace caffe
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 9ea85e973..dfcc941da 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -1222,19 +1222,7 @@ void Net<Dtype>::CopyTrainedLayersFrom(const NetParameter& param_inp) {
 #ifdef USE_MLSL
   NetParameter param_mn;
   if (mn::is_multinode()) {
-    // set per-layer multi-node parameters before adjusting net proto
-    for (int i = 0; i < param.layer_size(); i++) {
-      LayerParameter* source_layer = param.mutable_layer(i);
-      const string& source_layer_name = source_layer->name();
-      int target_layer_id = 0;
-      while (target_layer_id != layer_names_.size() &&
-             layer_names_[target_layer_id] != source_layer_name) {
-        ++target_layer_id;
-      }
-      if (target_layer_id == layer_names_.size()) continue;
-      *source_layer->mutable_multinode() =
-        layers_[target_layer_id]->layer_param().multinode();
-    }
+    CopyMultinodeParamsFromNet<Dtype>(this, &param);
     ApplyMultinodeParams<Dtype>(param, &param_mn);
     param = param_mn;
   }

From cfa6761d2574eee107bcf008d4167ca2da7cde4c Mon Sep 17 00:00:00 2001
From: "Yu, Chong" <chong.yu@intel.com>
Date: Fri, 23 Jun 2017 10:34:17 +0800
Subject: [PATCH 25/34] Fix (ICL-128) GoogleNet-V2 inference resut with MKLDNN
 engine will be -nan with two times running

---
 src/caffe/layers/mkldnn_batch_norm_layer.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/caffe/layers/mkldnn_batch_norm_layer.cpp b/src/caffe/layers/mkldnn_batch_norm_layer.cpp
index b479f8828..dd1b7f7b6 100644
--- a/src/caffe/layers/mkldnn_batch_norm_layer.cpp
+++ b/src/caffe/layers/mkldnn_batch_norm_layer.cpp
@@ -291,8 +291,11 @@ void MKLDNNBatchNormLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom
         Dtype *mean_buffer_ = (Dtype *)(mean_memory->get_data_handle());
         Dtype *variance_buffer_ = (Dtype *)(variance_memory->get_data_handle());
 
-        caffe_scal(this->channels_, scale_factor, mean_buffer_);
-        caffe_scal(this->channels_, scale_factor, variance_buffer_);
+        //TODO: optimize, do this operation in the InitBatchNorm, so no need to calculate each time
+        caffe_cpu_scale(this->blobs_[0]->count(), scale_factor,
+                    this->blobs_[0]->cpu_data(), mean_buffer_);
+        caffe_cpu_scale(this->blobs_[1]->count(), scale_factor,
+                    this->blobs_[1]->cpu_data(), variance_buffer_);
     }
     if (use_weight_bias_) {
         Dtype* scaleShift_buffer_ = (Dtype *)(scaleshift_memory->get_data_handle());

From 385e7705bc0e98203b79ef50ddf3a12dd29cfaa4 Mon Sep 17 00:00:00 2001
From: "Shen, Haihao" <haihao.shen@intel.com>
Date: Fri, 23 Jun 2017 14:13:04 +0800
Subject: [PATCH 26/34] Update Dockfile

---
 docker/standalone/cpu-centos/Dockerfile | 2 +-
 docker/standalone/cpu-ubuntu/Dockerfile | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/standalone/cpu-centos/Dockerfile b/docker/standalone/cpu-centos/Dockerfile
index ea05a016d..b372316e4 100644
--- a/docker/standalone/cpu-centos/Dockerfile
+++ b/docker/standalone/cpu-centos/Dockerfile
@@ -42,7 +42,7 @@ ENV CLONE_TAG=master
 RUN git clone -b ${CLONE_TAG} --depth 1 https://github.com/intel/caffe.git . && \
     for req in $(cat python/requirements.txt) pydot; do pip --no-cache-dir install $req; done && \
     mkdir build && cd build && \
-    cmake -DCPU_ONLY=1  -DUSE_MKL2017_AS_DEFAULT_ENGINE=1 -DCMAKE_BUILD_TYPE=Release .. && \
+    cmake -DCPU_ONLY=1 -DCMAKE_BUILD_TYPE=Release .. && \
     make all -j"$(nproc)"
 
 ENV PYCAFFE_ROOT $CAFFE_ROOT/python
diff --git a/docker/standalone/cpu-ubuntu/Dockerfile b/docker/standalone/cpu-ubuntu/Dockerfile
index 255e3da14..388654d72 100644
--- a/docker/standalone/cpu-ubuntu/Dockerfile
+++ b/docker/standalone/cpu-ubuntu/Dockerfile
@@ -31,7 +31,7 @@ ENV CLONE_TAG=master
 RUN git clone -b ${CLONE_TAG} --depth 1 https://github.com/intel/caffe.git . && \
     for req in $(cat python/requirements.txt) pydot; do pip install $req; done && \
     mkdir build && cd build && \
-    cmake -DCPU_ONLY=1 -DUSE_MKL2017_AS_DEFAULT_ENGINE=1 -DCMAKE_BUILD_TYPE=Release .. && \
+    cmake -DCPU_ONLY=1 -DCMAKE_BUILD_TYPE=Release .. && \
     make all -j"$(nproc)"
 
 ENV PYCAFFE_ROOT $CAFFE_ROOT/python

From d4117e40053ebb71c436088058f2dd2b5dbe06c2 Mon Sep 17 00:00:00 2001
From: "Shen, Haihao" <haihao.shen@intel.com>
Date: Fri, 23 Jun 2017 15:40:16 +0800
Subject: [PATCH 27/34] Add Winograd support

---
 include/caffe/layers/mkldnn_layers.hpp        |    1 +
 src/caffe/layers/mkldnn_convolution_layer.cpp | 1104 +++++++++--------
 src/caffe/proto/caffe.proto                   |    1 +
 3 files changed, 574 insertions(+), 532 deletions(-)

diff --git a/include/caffe/layers/mkldnn_layers.hpp b/include/caffe/layers/mkldnn_layers.hpp
index c4afb2517..a7bb8f659 100644
--- a/include/caffe/layers/mkldnn_layers.hpp
+++ b/include/caffe/layers/mkldnn_layers.hpp
@@ -164,6 +164,7 @@ class MKLDNNConvolutionLayer : public MKLDNNLayer<Dtype> , public ConvolutionLay
                     , bwdw_top_diff_primitive, bwdw_bottom_data_primitive;
     int32_t width_, height_, width_out_, height_out_, kernel_w_, kernel_h_, stride_w_, stride_h_;
     int  pad_w_, pad_h_;
+    mkldnn::algorithm  conv_algorithm;
 
     PERFORMANCE_EVENT_ID_DECL(perf_id_fw_);
     PERFORMANCE_EVENT_ID_DECL(perf_id_bw_);
diff --git a/src/caffe/layers/mkldnn_convolution_layer.cpp b/src/caffe/layers/mkldnn_convolution_layer.cpp
index 649224f0a..a9649ad79 100644
--- a/src/caffe/layers/mkldnn_convolution_layer.cpp
+++ b/src/caffe/layers/mkldnn_convolution_layer.cpp
@@ -1,477 +1,517 @@
-/*
-All modification made by Intel Corporation: © 2016 Intel Corporation
-
-All contributions by the University of California:
-Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
-All rights reserved.
-
-All other contributions:
-Copyright (c) 2014, 2015, the respective contributors
-All rights reserved.
-For the list of contributors go to https://github.com/BVLC/caffe/blob/master/CONTRIBUTORS.md
-
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-      this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of Intel Corporation nor the names of its contributors
-      may be used to endorse or promote products derived from this software
-      without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifdef MKLDNN_SUPPORTED
-#include <algorithm>
-#include <cstdlib>
-#include <vector>
-
-#include "caffe/filler.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/layers/mkldnn_layers.hpp"
-//#include "mkl_service.h"
-
-// TODO: Correct process case if there are no bias
-// TODO: Exception handling - mkl-dnn produces exceptions on errors
-
-namespace caffe {
-
-template <typename Dtype>
-MKLDNNConvolutionLayer<Dtype>::MKLDNNConvolutionLayer(const LayerParameter& param)
-            : MKLDNNLayer<Dtype>(), ConvolutionLayer<Dtype>(param)
-            , fwd_bottom_data(NULL), fwd_top_data(NULL), fwd_weights_data(NULL), fwd_bias_data(NULL)
-            , bwdd_weights_data(NULL), bwdw_bottom_data(NULL)
-            , bwdd_bottom_diff(NULL), bwdd_top_diff(NULL)
-            , bwdw_top_diff(NULL), bwdw_weights_diff(NULL), bwdw_bias_diff(NULL)
-            , convFwd_pd(NULL), convBwdData_pd(NULL), convBwdWeights_pd(NULL)
-            , fwd_top_data_memory(NULL), bwdd_bottom_diff_memory(NULL)
-            , bwdw_weights_diff_memory(NULL), bwdw_bias_diff_memory(NULL)
-            , fwd_bottom_data_primitive(NULL), fwd_weights_data_primitive(NULL), fwd_bias_data_primitive(NULL)
-            , bwdd_top_diff_primitive(NULL), bwdd_weights_data_primitive(NULL)
-            , bwdw_top_diff_primitive(NULL), bwdw_bottom_data_primitive(NULL)
-            , width_(0), height_(0), width_out_(0), height_out_(0), kernel_w_(0), kernel_h_(0)
-            , stride_w_(0), stride_h_(0), pad_w_(0), pad_h_(0)
-{
-  PERFORMANCE_EVENT_ID_RESET(perf_id_fw_);
-  PERFORMANCE_EVENT_ID_RESET(perf_id_bw_);
-  PERFORMANCE_EVENT_ID_RESET(perf_id_bw_weights_);
-}
-
-template <typename Dtype>
-void MKLDNNConvolutionLayer<Dtype>::compute_output_shape()
-{
-    ConvolutionLayer<Dtype>::compute_output_shape();
-    this->height_out_ = (this->height_ + 2 * this->pad_h_ - this->kernel_h_)
-        / this->stride_h_ + 1;
-    this->width_out_ = (this->width_ + 2 * this->pad_w_ - this->kernel_w_)
-        / this->stride_w_ + 1;
-}
-
-template <typename Dtype>
-void MKLDNNConvolutionLayer<Dtype>::init_properties(const vector<Blob<Dtype>*>& bottom
-                                                , const vector<Blob<Dtype>*>& top)
-{
-    this->stride_w_ = this->stride_.cpu_data()[1];
-    this->stride_h_ = this->stride_.cpu_data()[0];
-    this->width_ = bottom[0]->width();
-    this->height_ = bottom[0]->height();
-    this->pad_w_ = this->pad_.cpu_data()[1];
-    this->pad_h_ = this->pad_.cpu_data()[0];
-    this->kernel_w_ = this->kernel_shape_.cpu_data()[1];
-    this->kernel_h_  = this->kernel_shape_.cpu_data()[0];
-}
-
-template <typename Dtype>
-void MKLDNNConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom
-                                            , const vector<Blob<Dtype>*>& top)
-{
-    VLOG(1) << "<< MKLDNNConvolutionLayer<Dtype>::LayerSetUp: " << this->layer_param_.name();
-    ConvolutionLayer<Dtype>::LayerSetUp(bottom, top);
-    init_properties(bottom, top);
-    this->bottom_shape_ = &bottom[0]->shape();
-}
-
-template <typename Dtype>
-void MKLDNNConvolutionLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom
-                                            , const vector<Blob<Dtype>*>& top)
-{
-    VLOG(1) << " MKLDNNConvolutionLayer<Dtype>::Reshape: " << this->layer_param_.name();
-    BaseConvolutionLayer<Dtype>::ReshapeForMKL(bottom, top);
-    init_properties(bottom, top);
-}
-
-template <typename Dtype>
-void MKLDNNConvolutionLayer<Dtype>::InitConvolutionFwd(const vector<Blob<Dtype>*>& bottom
-                                                , const vector<Blob<Dtype>*>& top)
-{
-    if (std::is_same<Dtype, double>::value)   NOT_IMPLEMENTED;
-    auto propagation = this->phase_ == TEST ? prop_kind::forward_scoring : prop_kind::forward_training;
-    bool relu = this->layer_param_.convolution_param().relu();
-    Dtype negative_slope = 0;
-    if(relu)
-    {
-        propagation = prop_kind::forward_inference;
-        negative_slope = this->layer_param_.relu_param().negative_slope();
-    }
-
-    int32_t g  = std::max(this->group_, 1);
-    int32_t n  = this->num_;
-    int32_t iw = this->width_;
-    int32_t ih = this->height_;
-    int32_t ic = this->channels_;
-
-    int32_t ow = this->width_out_;
-    int32_t oh = this->height_out_;
-    int32_t oc = this->num_output_;
-
-    int32_t kw = this->kernel_w_;
-    int32_t kh = this->kernel_h_;
-
-    memory::dims convolutionStrides {this->stride_h_, this->stride_w_};
-    memory::dims padding {this->pad_h_, this->pad_w_};
-
-    // ---- Initialize memory descriptors (fromat = any) to create convolution descriptor -------------
-    memory::data_type mpcsn = memory::data_type::f32;
-    memory::format mfmt_any = memory::format::any;
-
-    memory::dims bottom_tz = {n, ic, ih, iw};
-    memory::dims bias_tz = {oc};
-    memory::dims top_tz = {n, oc, oh, ow};
-    memory::dims weights_tz = (g!= 1) ? memory::dims{g, oc/g, ic/g, kh, kw} : memory::dims{oc, ic, kh, kw};
-
-    // ---- Memory descriptors for initializing of convolution primitive descriptor -------------
-    memory::desc init_bottom_md({bottom_tz}, mpcsn, mfmt_any);
-    memory::desc init_bias_md({bias_tz}, mpcsn, mfmt_any);
-    memory::desc init_top_md({top_tz}, mpcsn, mfmt_any);
-    memory::desc init_weights_md({weights_tz}, mpcsn, mfmt_any);
-
-    // ---- Initialize convolution primitive descriptor -------------
-    shared_ptr<convolution_forward::desc> convFwd_desc;
-    if (this->bias_term_) {
-        convFwd_desc.reset(new convolution_forward::desc(propagation, algorithm::convolution_direct
-                                    , init_bottom_md, init_weights_md, init_bias_md, init_top_md
-                                    , convolutionStrides, padding, padding, padding_kind::zero));
-    } else {
-        convFwd_desc.reset(new convolution_forward::desc(propagation, algorithm::convolution_direct
-                                    , init_bottom_md, init_weights_md, init_top_md
-                                    , convolutionStrides, padding, padding, padding_kind::zero));
-    }
-    shared_ptr<convolution_relu_forward::desc> convReluFwd_desc;
-    if(relu) convReluFwd_desc.reset(new convolution_relu_forward::desc(*convFwd_desc, negative_slope));
-    // ---- Determining engine to use -----------------------
-    std::string subengines = this->layer_param_.engine();
-    if (subengines == "" || subengines == "MKLDNN")
-      subengines = "MKLDNN:CPU";
-    EngineParser ep(subengines);
-    unsigned subEngineIndex = 0;
-    shared_ptr<convolution_relu_forward::primitive_desc> convReluFwd_pd;
-    for(; subEngineIndex < ep.getNumberOfSubEngines(); subEngineIndex++) {
-      try {
-        convFwd_pd.reset(new convolution_forward::primitive_desc(*convFwd_desc,
-                ep.getMKLDNNSubEngine(subEngineIndex)));
-        if(relu) convReluFwd_pd.reset(new convolution_relu_forward::primitive_desc(*convReluFwd_desc,
-                ep.getMKLDNNSubEngine(subEngineIndex)));
-      }
-      catch(...) {
-        continue;
-      }
-      break;
-    }
-
-    CHECK(convFwd_pd);
-    engine cpu_engine = CpuEngine::Instance().get_engine();
-
-    // ---- Create priv memory primitive descriptors stored as class members -------------
-    typedef typename memory::primitive_desc MemPD; // short name for memory::primitive_desc
-
-    shared_ptr<MemPD> prv_fwd_bottom_data_memory_pd(new MemPD(convFwd_pd->src_primitive_desc()));
-    shared_ptr<MemPD> prv_fwd_top_data_memory_pd(new MemPD(convFwd_pd->dst_primitive_desc()));
-    shared_ptr<MemPD> prv_fwd_weights_data_memory_pd(new MemPD(convFwd_pd->weights_primitive_desc()));
-
-    // ---- Create usr memory primitive descriptors -------------
-    memory::format mfmt_nchw = memory::format::nchw;
-    memory::format weights_mfmt = (g!= 1) ? memory::format::goihw : memory::format::oihw;
-
-    // TODO: There should not be a problem to use this for Backward as well
-    shared_ptr<MemPD> usr_bottom_data_memory_pd(new MemPD({{bottom_tz}, mpcsn, mfmt_nchw}, cpu_engine));
-    shared_ptr<MemPD> usr_bias_data_memory_pd(new MemPD({{bias_tz}, mpcsn, memory::format::x}, cpu_engine));
-    shared_ptr<MemPD> usr_top_data_memory_pd(new MemPD({{top_tz}, mpcsn, mfmt_nchw}, cpu_engine));
-    shared_ptr<MemPD> usr_weights_data_memory_pd(new MemPD({{weights_tz}, mpcsn, weights_mfmt}, cpu_engine));
-
-
-    // ---  init primitive and prv_memory descriptors ----------------------
-    fwd_bottom_data.reset(new MKLDNNData<Dtype>(usr_bottom_data_memory_pd, prv_fwd_bottom_data_memory_pd, bottom[0], this));
-    fwd_bottom_data ->name = "fwd_bottom_data   @ " + this->layer_param_.name();
-    fwd_bottom_data_primitive = fwd_bottom_data->create_input(false);
-
-    fwd_top_data.reset(new MKLDNNData<Dtype>(usr_top_data_memory_pd, prv_fwd_top_data_memory_pd, top[0], this));
-    fwd_top_data    ->name = "fwd_top_data      @ " + this->layer_param_.name();
-    fwd_top_data_memory = fwd_top_data->create_output_memory();
-
-    fwd_weights_data.reset(new MKLDNNData<Dtype>(usr_weights_data_memory_pd, prv_fwd_weights_data_memory_pd, this->blobs_[0].get(), this));
-    fwd_weights_data->name = "fwd_weights_data  @ " + this->layer_param_.name();
-    fwd_weights_data_primitive = fwd_weights_data->create_input(true);
-
-    if (this->bias_term_) {
-        shared_ptr<MemPD> prv_fwd_bias_data_memory_pd(new MemPD(convFwd_pd->bias_primitive_desc()));
-        fwd_bias_data.reset(new MKLDNNData<Dtype>(usr_bias_data_memory_pd, prv_fwd_bias_data_memory_pd, this->blobs_[1].get(), this));
-        fwd_bias_data->name = "fwd_bias_data     @ " + this->layer_param_.name();
-        fwd_bias_data_primitive = fwd_bias_data->create_input(true);
-        if(relu) {
-          convFwd.reset(new convolution_relu_forward(*convReluFwd_pd
-                          , *fwd_bottom_data_primitive, *fwd_weights_data_primitive
-                          , *fwd_bias_data_primitive, *fwd_top_data_memory));
-        } else {
-          convFwd.reset(new convolution_forward(*convFwd_pd
-                          , *fwd_bottom_data_primitive, *fwd_weights_data_primitive
-                          , *fwd_bias_data_primitive, *fwd_top_data_memory));
-        }
-        fwd_bias_data->set_mkldnn_primitive(convFwd);
-    } else {
-        if(relu) {
-          convFwd.reset(new convolution_relu_forward(*convReluFwd_pd
-                          , *fwd_bottom_data_primitive, *fwd_weights_data_primitive
-                          , *fwd_top_data_memory));
-        } else {
-          convFwd.reset(new convolution_forward(*convFwd_pd
-                          , *fwd_bottom_data_primitive, *fwd_weights_data_primitive
-                          , *fwd_top_data_memory));
-        }
-    }
-    fwd_bottom_data->set_mkldnn_primitive(convFwd);   //Wrong passed primitive! (TODO: Checking!)
-    //MKLDNNPrimitive<Dtype> fwd_bottom_data_primitive_transfer(fwd_bottom_data_primitive);
-    //fwd_bottom_data->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer);
-
-    fwd_top_data->set_mkldnn_primitive(convFwd);
-
-    //fwd_weights_data->set_mkldnn_primitive(convFwd);  //Wrong passed primitive! (For sure!)
-    MKLDNNPrimitive<Dtype> fwd_weights_data_primitive_transfer(fwd_weights_data_primitive);
-    fwd_weights_data->set_mkldnn_primitive(fwd_weights_data_primitive_transfer);
-
-    // Names are for debugging purposes only.
-}
-
-template <typename Dtype>
-void MKLDNNConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom
-                                                , const vector<Blob<Dtype>*>& top)
-{
-    VLOG(1) << "MKLDNNConvolutionLayer<Dtype>::Forward_cpu: " << this->layer_param_.name();
-    if( convFwd_pd == NULL)
-        InitConvolutionFwd(bottom, top);
-    // making reorders if needed.
-    fwd_bottom_data->sync_before_read();
-    fwd_weights_data->sync_before_read();
-    if (this->bias_term_)
-        fwd_bias_data->sync_before_read();
-    // update top that head at prv
-    fwd_top_data->sync_before_write();
-
-    PERFORMANCE_EVENT_ID_INIT(perf_id_fw_, PERFORMANCE_MKLDNN_NAME("FW"));
-    PERFORMANCE_MEASUREMENT_BEGIN();
-    convFwd.submit();
-    PERFORMANCE_MEASUREMENT_END_ID(perf_id_fw_);
-}
-
-
-template <typename Dtype>
-void MKLDNNConvolutionLayer<Dtype>::InitConvolutionBwd(const vector<Blob<Dtype>*>& top
-                                                    , const vector<bool>& propagate_down
-                                                    , const vector<Blob<Dtype>*>& bottom)
-{
-    if (std::is_same<Dtype, double>::value)   NOT_IMPLEMENTED;
-
-    int32_t g  = std::max(this->group_, 1);
-    int32_t n  = this->num_;
-    int32_t iw = this->width_;
-    int32_t ih = this->height_;
-    int32_t ic = this->channels_;
-
-    int32_t ow = this->width_out_;
-    int32_t oh = this->height_out_;
-    int32_t oc = this->num_output_;
-
-    int32_t kw = this->kernel_w_;
-    int32_t kh = this->kernel_h_;
-
-    memory::dims convolutionStrides {this->stride_h_, this->stride_w_};
-    memory::dims padding {this->pad_h_, this->pad_w_};
-
-    // ---- Initialize memory descriptors (fromat = any) to create convolution descriptor -------------
-    memory::data_type mpcsn = memory::data_type::f32;
-    memory::format mfmt_any = memory::format::any;
-
-    memory::dims bottom_tz = {n, ic, ih, iw};
-    memory::dims bias_tz = {oc};
-    memory::dims top_tz = {n, oc, oh, ow};
-    memory::dims weights_tz = ( g!= 1) ? memory::dims{g, oc/g, ic/g, kh, kw} : memory::dims{oc, ic, kh, kw};
-
-    // ---- Memory descriptors for initializing of convolution primitive descriptor -------------
-    memory::desc init_bottom_md({bottom_tz}, mpcsn, mfmt_any);
-    memory::desc init_bias_md({bias_tz}, mpcsn, mfmt_any);
-    memory::desc init_top_md({top_tz}, mpcsn, mfmt_any);
-    memory::desc init_weights_md({weights_tz}, mpcsn, mfmt_any);
-
-    // ---- Initialize convolution primitive descriptor -------------
-    shared_ptr<convolution_backward_data::desc> convBwdData_desc;
-    shared_ptr<convolution_backward_weights::desc> convBwdWeights_desc;
-    if (this->bias_term_) {
-        convBwdWeights_desc.reset(new convolution_backward_weights::desc(algorithm::convolution_direct
-                            , init_bottom_md, init_weights_md, init_bias_md, init_top_md
-                            , convolutionStrides, padding, padding, padding_kind::zero));
-    } else {
-        convBwdWeights_desc.reset(new convolution_backward_weights::desc(algorithm::convolution_direct
-                            , init_bottom_md, init_weights_md, init_top_md
-                            , convolutionStrides, padding, padding, padding_kind::zero));
-    }
-
-    convBwdData_desc.reset(new convolution_backward_data::desc(algorithm::convolution_direct
-                            , init_bottom_md, init_weights_md, init_top_md
-                            , convolutionStrides, padding, padding, padding_kind::zero));
-
-    // ---- Determining engine to use -----------------------
-    std::string subengines = this->layer_param_.engine();
-    if (subengines == "" || subengines == "MKLDNN")
-      subengines = "MKLDNN:CPU";
-    EngineParser ep(subengines);
-    unsigned subEngineIndex = 0;
-    for(; subEngineIndex < ep.getNumberOfSubEngines(); subEngineIndex++) {
-      try {
-        convBwdData_pd.reset(new convolution_backward_data::primitive_desc(*convBwdData_desc,
-                ep.getMKLDNNSubEngine(subEngineIndex), *convFwd_pd));
-
-        convBwdWeights_pd.reset(new convolution_backward_weights::primitive_desc(*convBwdWeights_desc,
-                ep.getMKLDNNSubEngine(subEngineIndex), *convFwd_pd));
-      }
-      catch(...) {
-        continue;
-      }
-      break;
-    }
-    CHECK(convBwdData_pd);
-    CHECK(convBwdWeights_pd);
-    engine cpu_engine = CpuEngine::Instance().get_engine();
-
-    // ---- Create priv memory primitive descriptors stored as class members -------------
-    typedef typename memory::primitive_desc MemPD; // short name for memory::primitive_desc
-
-    shared_ptr<MemPD> prv_bwdd_bottom_diff_memory_pd(new MemPD(convBwdData_pd->diff_src_primitive_desc()));
-    shared_ptr<MemPD> prv_bwdd_top_diff_memory_pd(new MemPD(convBwdData_pd->diff_dst_primitive_desc()));
-    shared_ptr<MemPD> prv_bwdd_weights_data_memory_pd(new MemPD(convBwdData_pd->weights_primitive_desc()));
-
-    shared_ptr<MemPD> prv_bwdw_bottom_data_memory_pd(new MemPD(convBwdWeights_pd->src_primitive_desc()));
-    shared_ptr<MemPD> prv_bwdw_top_diff_memory_pd(new MemPD(convBwdWeights_pd->diff_dst_primitive_desc()));
-    shared_ptr<MemPD> prv_bwdw_weights_diff_memory_pd(new MemPD(convBwdWeights_pd->diff_weights_primitive_desc()));
-
-    // ---- Create usr memory primitive descriptors -------------
-    memory::format mfmt_nchw = memory::format::nchw;
-    memory::format weights_mfmt = ( g!= 1) ? memory::format::goihw : memory::format::oihw;
-
-    // ???!!! can we use usr memory primitive descrittors for backward??
-    shared_ptr<MemPD> usr_bottom_data_memory_pd(new MemPD({{bottom_tz}, mpcsn, mfmt_nchw}, cpu_engine));
-    shared_ptr<MemPD> usr_bias_data_memory_pd(new MemPD({{bias_tz}, mpcsn, memory::format::x}, cpu_engine));
-    shared_ptr<MemPD> usr_top_data_memory_pd(new MemPD({{top_tz}, mpcsn, mfmt_nchw}, cpu_engine));
-    shared_ptr<MemPD> usr_weights_data_memory_pd(new MemPD({{weights_tz}, mpcsn, weights_mfmt}, cpu_engine));
-
-
-    // ---  init primitive and prv_memory descriptors ----------------------
-    bwdd_bottom_diff.reset(new MKLDNNDiff<Dtype>(usr_bottom_data_memory_pd, prv_bwdd_bottom_diff_memory_pd, bottom[0], this));
-    bwdd_bottom_diff ->name = "bwdd_bottom_diff   @ " + this->layer_param_.name();
-    bwdd_bottom_diff_memory = bwdd_bottom_diff->create_output_memory();
-    bwdw_bottom_data.reset(new MKLDNNData<Dtype>(usr_bottom_data_memory_pd, prv_bwdw_bottom_data_memory_pd, bottom[0], this));
-    bwdw_bottom_data ->name = "bwdw_bottom_data   @ " + this->layer_param_.name();
-    bwdw_bottom_data_primitive = bwdw_bottom_data->create_input(false);
-
-    bwdd_top_diff.reset(new MKLDNNDiff<Dtype>(usr_top_data_memory_pd, prv_bwdd_top_diff_memory_pd, top[0], this));
-    bwdd_top_diff    ->name = "bwdd_top_diff      @ " + this->layer_param_.name();
-    bwdd_top_diff_primitive = bwdd_top_diff->create_input(false);
-    bwdw_top_diff.reset(new MKLDNNDiff<Dtype>(usr_top_data_memory_pd, prv_bwdw_top_diff_memory_pd, top[0], this));
-    bwdw_top_diff    ->name = "bwdw_top_diff      @ " + this->layer_param_.name();
-    bwdw_top_diff_primitive = bwdw_top_diff->create_input(false);
-
-    bwdd_weights_data.reset(new MKLDNNData<Dtype>(usr_weights_data_memory_pd, prv_bwdd_weights_data_memory_pd, this->blobs_[0].get(), this));
-    bwdd_weights_data->name = "bwdd_weights_data  @ " + this->layer_param_.name();
-    bwdd_weights_data_primitive = bwdd_weights_data->create_input(false);
-    bwdw_weights_diff.reset(new MKLDNNDiff<Dtype>(usr_weights_data_memory_pd, prv_bwdw_weights_diff_memory_pd, this->blobs_[0].get(), this));
-    bwdw_weights_diff->name = "bwdw_weights_diff  @ " + this->layer_param_.name();
-    bwdw_weights_diff_memory = bwdw_weights_diff->create_output_memory();
-
-    if (this->bias_term_) {
-        shared_ptr<MemPD> prv_bwdw_bias_diff_memory_pd(new MemPD(convBwdWeights_pd->diff_bias_primitive_desc()));
-        bwdw_bias_diff.reset(new MKLDNNDiff<Dtype>(usr_bias_data_memory_pd, prv_bwdw_bias_diff_memory_pd, this->blobs_[1].get(), this));
-        bwdw_bias_diff->name = "bwdw_bias_diff     @ " + this->layer_param_.name();
-        bwdw_bias_diff_memory = bwdw_bias_diff->create_output_memory();
-
-        convBwdWeights.reset(new convolution_backward_weights(*convBwdWeights_pd
-                        , *bwdw_bottom_data_primitive, *bwdw_top_diff_primitive
-                        , *bwdw_weights_diff_memory, *bwdw_bias_diff_memory));
-
-        bwdw_bias_diff->set_mkldnn_primitive(convBwdWeights);
-    } else {
-        convBwdWeights.reset(new convolution_backward_weights(*convBwdWeights_pd
-                        , *bwdw_bottom_data_primitive, *bwdw_top_diff_primitive
-                        , *bwdw_weights_diff_memory));
-    }
-
-    convBwdData.reset(new convolution_backward_data(*convBwdData_pd
-                    , *bwdd_top_diff_primitive, *bwdd_weights_data_primitive
-                    , *bwdd_bottom_diff_memory));
-
-    bwdd_bottom_diff->set_mkldnn_primitive(convBwdData);
-
-    bwdd_top_diff->set_mkldnn_primitive(convBwdData);         //Wrong passed primitive! (TODO: Checking!)
-    //MKLDNNPrimitive<Dtype> bwdd_top_diff_primitive_transfer(bwdd_top_diff_primitive);
-    //bwdd_top_diff->set_mkldnn_primitive(bwdd_top_diff_primitive_transfer);
-
-    //bwdd_weights_data->set_mkldnn_primitive(convBwdData);     //Wrong passed primitive! (For sure!)
-    MKLDNNPrimitive<Dtype> bwdd_weights_data_primitive_transfer(bwdd_weights_data_primitive);
-    bwdd_weights_data->set_mkldnn_primitive(bwdd_weights_data_primitive_transfer);
-
-
-    bwdw_bottom_data->set_mkldnn_primitive(convBwdWeights);   //Wrong passed primitive! (TODO: Checking!)
-    //MKLDNNPrimitive<Dtype> bwdw_bottom_data_primitive_transfer(bwdw_bottom_data_primitive);
-    //bwdw_bottom_data->set_mkldnn_primitive(bwdw_bottom_data_primitive_transfer);
-
-    bwdw_top_diff->set_mkldnn_primitive(convBwdWeights);      //Wrong passed primitive! (TODO: Checking!)
-    //MKLDNNPrimitive<Dtype> bwdw_top_diff_primitive_transfer(bwdw_top_diff_primitive);
-    //bwdw_top_diff->set_mkldnn_primitive(bwdw_top_diff_primitive_transfer);
-
-    bwdw_weights_diff->set_mkldnn_primitive(convBwdWeights);
-
-    // Names are for debugging purposes only.
-}
-
-
-template <typename Dtype>
-void MKLDNNConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top
-                                                , const vector<bool>& propagate_down
-                                                , const vector<Blob<Dtype>*>& bottom)
-{
-    VLOG(1) << "MKLDNNConvolutionLayer<Dtype>::Backward_cpu: " << this->layer_param_.name();
-    if( convBwdData_pd == NULL)
-        InitConvolutionBwd(top, propagate_down, bottom);
-    if (propagate_down[0]) {
-        // making reorders if needed.
-        bwdd_top_diff->sync_before_read();
-        bwdd_weights_data->sync_before_read();
-        bwdd_bottom_diff->sync_before_write();
-
-        PERFORMANCE_EVENT_ID_INIT(perf_id_bw_, PERFORMANCE_MKLDNN_NAME("BW"));
-        PERFORMANCE_MEASUREMENT_BEGIN();
-#ifdef DEBUG
+/*
+All modification made by Intel Corporation: © 2016 Intel Corporation
+
+All contributions by the University of California:
+Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
+All rights reserved.
+
+All other contributions:
+Copyright (c) 2014, 2015, the respective contributors
+All rights reserved.
+For the list of contributors go to https://github.com/BVLC/caffe/blob/master/CONTRIBUTORS.md
+
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef MKLDNN_SUPPORTED
+#include <algorithm>
+#include <cstdlib>
+#include <vector>
+
+#include "caffe/filler.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/layers/mkldnn_layers.hpp"
+//#include "mkl_service.h"
+
+// TODO: Correct process case if there are no bias
+// TODO: Exception handling - mkl-dnn produces exceptions on errors
+
+namespace caffe {
+
+template <typename Dtype>
+MKLDNNConvolutionLayer<Dtype>::MKLDNNConvolutionLayer(const LayerParameter& param)
+            : MKLDNNLayer<Dtype>(), ConvolutionLayer<Dtype>(param)
+            , fwd_bottom_data(NULL), fwd_top_data(NULL), fwd_weights_data(NULL), fwd_bias_data(NULL)
+            , bwdd_weights_data(NULL), bwdw_bottom_data(NULL)
+            , bwdd_bottom_diff(NULL), bwdd_top_diff(NULL)
+            , bwdw_top_diff(NULL), bwdw_weights_diff(NULL), bwdw_bias_diff(NULL)
+            , convFwd_pd(NULL), convBwdData_pd(NULL), convBwdWeights_pd(NULL)
+            , fwd_top_data_memory(NULL), bwdd_bottom_diff_memory(NULL)
+            , bwdw_weights_diff_memory(NULL), bwdw_bias_diff_memory(NULL)
+            , fwd_bottom_data_primitive(NULL), fwd_weights_data_primitive(NULL), fwd_bias_data_primitive(NULL)
+            , bwdd_top_diff_primitive(NULL), bwdd_weights_data_primitive(NULL)
+            , bwdw_top_diff_primitive(NULL), bwdw_bottom_data_primitive(NULL)
+            , width_(0), height_(0), width_out_(0), height_out_(0), kernel_w_(0), kernel_h_(0)
+            , stride_w_(0), stride_h_(0), pad_w_(0), pad_h_(0)
+{
+  PERFORMANCE_EVENT_ID_RESET(perf_id_fw_);
+  PERFORMANCE_EVENT_ID_RESET(perf_id_bw_);
+  PERFORMANCE_EVENT_ID_RESET(perf_id_bw_weights_);
+}
+
+template <typename Dtype>
+void MKLDNNConvolutionLayer<Dtype>::compute_output_shape()
+{
+    ConvolutionLayer<Dtype>::compute_output_shape();
+    this->height_out_ = (this->height_ + 2 * this->pad_h_ - this->kernel_h_)
+        / this->stride_h_ + 1;
+    this->width_out_ = (this->width_ + 2 * this->pad_w_ - this->kernel_w_)
+        / this->stride_w_ + 1;
+}
+
+template <typename Dtype>
+void MKLDNNConvolutionLayer<Dtype>::init_properties(const vector<Blob<Dtype>*>& bottom
+                                                , const vector<Blob<Dtype>*>& top)
+{
+    this->stride_w_ = this->stride_.cpu_data()[1];
+    this->stride_h_ = this->stride_.cpu_data()[0];
+    this->width_ = bottom[0]->width();
+    this->height_ = bottom[0]->height();
+    this->pad_w_ = this->pad_.cpu_data()[1];
+    this->pad_h_ = this->pad_.cpu_data()[0];
+    this->kernel_w_ = this->kernel_shape_.cpu_data()[1];
+    this->kernel_h_  = this->kernel_shape_.cpu_data()[0];
+    string _conv_algorithm = this->layer_param_.convolution_param().conv_algorithm();
+    if(_conv_algorithm == "direct")
+    {
+        conv_algorithm = algorithm::convolution_direct;
+    }
+    else if(_conv_algorithm == "winograd")
+    {
+        conv_algorithm = algorithm::convolution_winograd;
+    }
+    else
+    {
+        LOG(ERROR) << "Unsupported convolution algorithm.";
+        CHECK(false);
+    }
+}
+
+template <typename Dtype>
+void MKLDNNConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom
+                                            , const vector<Blob<Dtype>*>& top)
+{
+    VLOG(1) << "<< MKLDNNConvolutionLayer<Dtype>::LayerSetUp: " << this->layer_param_.name();
+    ConvolutionLayer<Dtype>::LayerSetUp(bottom, top);
+    init_properties(bottom, top);
+    this->bottom_shape_ = &bottom[0]->shape();
+}
+
+template <typename Dtype>
+void MKLDNNConvolutionLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom
+                                            , const vector<Blob<Dtype>*>& top)
+{
+    VLOG(1) << " MKLDNNConvolutionLayer<Dtype>::Reshape: " << this->layer_param_.name();
+    BaseConvolutionLayer<Dtype>::ReshapeForMKL(bottom, top);
+    init_properties(bottom, top);
+}
+
+template <typename Dtype>
+void MKLDNNConvolutionLayer<Dtype>::InitConvolutionFwd(const vector<Blob<Dtype>*>& bottom
+                                                , const vector<Blob<Dtype>*>& top)
+{
+    if (std::is_same<Dtype, double>::value)   NOT_IMPLEMENTED;
+    auto propagation = this->phase_ == TEST ? prop_kind::forward_scoring : prop_kind::forward_training;
+    bool relu = this->layer_param_.convolution_param().relu();
+    Dtype negative_slope = 0;
+    if(relu)
+    {
+        propagation = prop_kind::forward_inference;
+        negative_slope = this->layer_param_.relu_param().negative_slope();
+    }
+
+    int32_t g  = std::max(this->group_, 1);
+    int32_t n  = this->num_;
+    int32_t iw = this->width_;
+    int32_t ih = this->height_;
+    int32_t ic = this->channels_;
+
+    int32_t ow = this->width_out_;
+    int32_t oh = this->height_out_;
+    int32_t oc = this->num_output_;
+
+    int32_t kw = this->kernel_w_;
+    int32_t kh = this->kernel_h_;
+
+    memory::dims convolutionStrides {this->stride_h_, this->stride_w_};
+    memory::dims padding {this->pad_h_, this->pad_w_};
+
+    // ---- Initialize memory descriptors (fromat = any) to create convolution descriptor -------------
+    memory::data_type mpcsn = memory::data_type::f32;
+    memory::format mfmt_any = memory::format::any;
+
+    memory::dims bottom_tz = {n, ic, ih, iw};
+    memory::dims bias_tz = {oc};
+    memory::dims top_tz = {n, oc, oh, ow};
+    memory::dims weights_tz = (g!= 1) ? memory::dims{g, oc/g, ic/g, kh, kw} : memory::dims{oc, ic, kh, kw};
+
+    // ---- Memory descriptors for initializing of convolution primitive descriptor -------------
+    memory::desc init_bottom_md({bottom_tz}, mpcsn, mfmt_any);
+    memory::desc init_bias_md({bias_tz}, mpcsn, mfmt_any);
+    memory::desc init_top_md({top_tz}, mpcsn, mfmt_any);
+    memory::desc init_weights_md({weights_tz}, mpcsn, mfmt_any);
+
+    // ---- Determining engine to use -----------------------
+    std::string subengines = this->layer_param_.engine();
+    if (subengines == "" || subengines == "MKLDNN")
+      subengines = "MKLDNN:CPU";
+    EngineParser ep(subengines);
+    unsigned subEngineIndex = 0;
+    shared_ptr<convolution_relu_forward::primitive_desc> convReluFwd_pd;
+    mkldnn::algorithm eligibleAlgorithms[2] = {conv_algorithm, algorithm::convolution_direct};
+    for (auto &convAlgorithm : eligibleAlgorithms) {
+        // ---- Initialize convolution primitive descriptor -------------
+        shared_ptr<convolution_forward::desc> convFwd_desc;
+        if (this->bias_term_) {
+            convFwd_desc.reset(new convolution_forward::desc(propagation, convAlgorithm
+                                                             , init_bottom_md, init_weights_md, init_bias_md, init_top_md
+                                                             , convolutionStrides, padding, padding, padding_kind::zero));
+        } else {
+            convFwd_desc.reset(new convolution_forward::desc(propagation, convAlgorithm
+                                                             , init_bottom_md, init_weights_md, init_top_md
+                                                             , convolutionStrides, padding, padding, padding_kind::zero));
+        }
+        shared_ptr<convolution_relu_forward::desc> convReluFwd_desc;
+        if(relu) convReluFwd_desc.reset(new convolution_relu_forward::desc(*convFwd_desc, negative_slope));
+
+        try {
+            for(subEngineIndex=0; subEngineIndex < ep.getNumberOfSubEngines(); subEngineIndex++) {
+                try {
+                    convFwd_pd.reset(new convolution_forward::primitive_desc(*convFwd_desc,
+                                                                             ep.getMKLDNNSubEngine(subEngineIndex)));
+                    if(relu) convReluFwd_pd.reset(new convolution_relu_forward::primitive_desc(*convReluFwd_desc,
+                                                                                               ep.getMKLDNNSubEngine(subEngineIndex)));
+                }
+                catch(...) {
+                    continue;
+                }
+                break;
+            }
+            if ((!convFwd_pd) || (relu && !convReluFwd_pd))
+                break;
+        }
+        catch(...) {
+            continue;
+        }
+        break;
+    }
+
+    CHECK(convFwd_pd);
+    if (relu) CHECK(convReluFwd_pd);
+    engine cpu_engine = CpuEngine::Instance().get_engine();
+
+    // ---- Create priv memory primitive descriptors stored as class members -------------
+    typedef typename memory::primitive_desc MemPD; // short name for memory::primitive_desc
+
+    shared_ptr<MemPD> prv_fwd_bottom_data_memory_pd(new MemPD(convFwd_pd->src_primitive_desc()));
+    shared_ptr<MemPD> prv_fwd_top_data_memory_pd(new MemPD(convFwd_pd->dst_primitive_desc()));
+    shared_ptr<MemPD> prv_fwd_weights_data_memory_pd(new MemPD(convFwd_pd->weights_primitive_desc()));
+
+    // ---- Create usr memory primitive descriptors -------------
+    memory::format mfmt_nchw = memory::format::nchw;
+    memory::format weights_mfmt = (g!= 1) ? memory::format::goihw : memory::format::oihw;
+
+    // TODO: There should not be a problem to use this for Backward as well
+    shared_ptr<MemPD> usr_bottom_data_memory_pd(new MemPD({{bottom_tz}, mpcsn, mfmt_nchw}, cpu_engine));
+    shared_ptr<MemPD> usr_bias_data_memory_pd(new MemPD({{bias_tz}, mpcsn, memory::format::x}, cpu_engine));
+    shared_ptr<MemPD> usr_top_data_memory_pd(new MemPD({{top_tz}, mpcsn, mfmt_nchw}, cpu_engine));
+    shared_ptr<MemPD> usr_weights_data_memory_pd(new MemPD({{weights_tz}, mpcsn, weights_mfmt}, cpu_engine));
+
+
+    // ---  init primitive and prv_memory descriptors ----------------------
+    fwd_bottom_data.reset(new MKLDNNData<Dtype>(usr_bottom_data_memory_pd, prv_fwd_bottom_data_memory_pd, bottom[0], this));
+    fwd_bottom_data ->name = "fwd_bottom_data   @ " + this->layer_param_.name();
+    fwd_bottom_data_primitive = fwd_bottom_data->create_input(false);
+
+    fwd_top_data.reset(new MKLDNNData<Dtype>(usr_top_data_memory_pd, prv_fwd_top_data_memory_pd, top[0], this));
+    fwd_top_data    ->name = "fwd_top_data      @ " + this->layer_param_.name();
+    fwd_top_data_memory = fwd_top_data->create_output_memory();
+
+    fwd_weights_data.reset(new MKLDNNData<Dtype>(usr_weights_data_memory_pd, prv_fwd_weights_data_memory_pd, this->blobs_[0].get(), this));
+    fwd_weights_data->name = "fwd_weights_data  @ " + this->layer_param_.name();
+    fwd_weights_data_primitive = fwd_weights_data->create_input(true);
+
+    if (this->bias_term_) {
+        shared_ptr<MemPD> prv_fwd_bias_data_memory_pd(new MemPD(convFwd_pd->bias_primitive_desc()));
+        fwd_bias_data.reset(new MKLDNNData<Dtype>(usr_bias_data_memory_pd, prv_fwd_bias_data_memory_pd, this->blobs_[1].get(), this));
+        fwd_bias_data->name = "fwd_bias_data     @ " + this->layer_param_.name();
+        fwd_bias_data_primitive = fwd_bias_data->create_input(true);
+        if(relu) {
+          convFwd.reset(new convolution_relu_forward(*convReluFwd_pd
+                          , *fwd_bottom_data_primitive, *fwd_weights_data_primitive
+                          , *fwd_bias_data_primitive, *fwd_top_data_memory));
+        } else {
+          convFwd.reset(new convolution_forward(*convFwd_pd
+                          , *fwd_bottom_data_primitive, *fwd_weights_data_primitive
+                          , *fwd_bias_data_primitive, *fwd_top_data_memory));
+        }
+        fwd_bias_data->set_mkldnn_primitive(convFwd);
+    } else {
+        if(relu) {
+          convFwd.reset(new convolution_relu_forward(*convReluFwd_pd
+                          , *fwd_bottom_data_primitive, *fwd_weights_data_primitive
+                          , *fwd_top_data_memory));
+        } else {
+          convFwd.reset(new convolution_forward(*convFwd_pd
+                          , *fwd_bottom_data_primitive, *fwd_weights_data_primitive
+                          , *fwd_top_data_memory));
+        }
+    }
+    fwd_bottom_data->set_mkldnn_primitive(convFwd);   //Wrong passed primitive! (TODO: Checking!)
+    //MKLDNNPrimitive<Dtype> fwd_bottom_data_primitive_transfer(fwd_bottom_data_primitive);
+    //fwd_bottom_data->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer);
+
+    fwd_top_data->set_mkldnn_primitive(convFwd);
+
+    //fwd_weights_data->set_mkldnn_primitive(convFwd);  //Wrong passed primitive! (For sure!)
+    MKLDNNPrimitive<Dtype> fwd_weights_data_primitive_transfer(fwd_weights_data_primitive);
+    fwd_weights_data->set_mkldnn_primitive(fwd_weights_data_primitive_transfer);
+
+    // Names are for debugging purposes only.
+}
+
+template <typename Dtype>
+void MKLDNNConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom
+                                                , const vector<Blob<Dtype>*>& top)
+{
+    VLOG(1) << "MKLDNNConvolutionLayer<Dtype>::Forward_cpu: " << this->layer_param_.name();
+    if( convFwd_pd == NULL)
+        InitConvolutionFwd(bottom, top);
+    // making reorders if needed.
+    fwd_bottom_data->sync_before_read();
+    fwd_weights_data->sync_before_read();
+    if (this->bias_term_)
+        fwd_bias_data->sync_before_read();
+    // update top that head at prv
+    fwd_top_data->sync_before_write();
+
+    PERFORMANCE_EVENT_ID_INIT(perf_id_fw_, PERFORMANCE_MKLDNN_NAME("FW"));
+    PERFORMANCE_MEASUREMENT_BEGIN();
+    convFwd.submit();
+    PERFORMANCE_MEASUREMENT_END_ID(perf_id_fw_);
+}
+
+
+template <typename Dtype>
+void MKLDNNConvolutionLayer<Dtype>::InitConvolutionBwd(const vector<Blob<Dtype>*>& top
+                                                    , const vector<bool>& propagate_down
+                                                    , const vector<Blob<Dtype>*>& bottom)
+{
+    if (std::is_same<Dtype, double>::value)   NOT_IMPLEMENTED;
+
+    int32_t g  = std::max(this->group_, 1);
+    int32_t n  = this->num_;
+    int32_t iw = this->width_;
+    int32_t ih = this->height_;
+    int32_t ic = this->channels_;
+
+    int32_t ow = this->width_out_;
+    int32_t oh = this->height_out_;
+    int32_t oc = this->num_output_;
+
+    int32_t kw = this->kernel_w_;
+    int32_t kh = this->kernel_h_;
+
+    memory::dims convolutionStrides {this->stride_h_, this->stride_w_};
+    memory::dims padding {this->pad_h_, this->pad_w_};
+
+    // ---- Initialize memory descriptors (fromat = any) to create convolution descriptor -------------
+    memory::data_type mpcsn = memory::data_type::f32;
+    memory::format mfmt_any = memory::format::any;
+
+    memory::dims bottom_tz = {n, ic, ih, iw};
+    memory::dims bias_tz = {oc};
+    memory::dims top_tz = {n, oc, oh, ow};
+    memory::dims weights_tz = ( g!= 1) ? memory::dims{g, oc/g, ic/g, kh, kw} : memory::dims{oc, ic, kh, kw};
+
+    // ---- Memory descriptors for initializing of convolution primitive descriptor -------------
+    memory::desc init_bottom_md({bottom_tz}, mpcsn, mfmt_any);
+    memory::desc init_bias_md({bias_tz}, mpcsn, mfmt_any);
+    memory::desc init_top_md({top_tz}, mpcsn, mfmt_any);
+    memory::desc init_weights_md({weights_tz}, mpcsn, mfmt_any);
+
+    // ---- Determining engine to use -----------------------
+    std::string subengines = this->layer_param_.engine();
+    if (subengines == "" || subengines == "MKLDNN")
+      subengines = "MKLDNN:CPU";
+    EngineParser ep(subengines);
+    unsigned subEngineIndex = 0;
+
+    auto eligibleAlgorithms = {conv_algorithm, algorithm::convolution_direct};
+    for (auto &convAlgorithm : eligibleAlgorithms) {
+        // ---- Initialize convolution primitive descriptor -------------
+        shared_ptr<convolution_backward_data::desc> convBwdData_desc;
+        shared_ptr<convolution_backward_weights::desc> convBwdWeights_desc;
+        if (this->bias_term_) {
+            convBwdWeights_desc.reset(new convolution_backward_weights::desc(convAlgorithm
+                            , init_bottom_md, init_weights_md, init_bias_md, init_top_md
+                            , convolutionStrides, padding, padding, padding_kind::zero));
+        } else {
+            convBwdWeights_desc.reset(new convolution_backward_weights::desc(convAlgorithm
+                                                                             , init_bottom_md, init_weights_md, init_top_md
+                                                                             , convolutionStrides, padding, padding, padding_kind::zero));
+        }
+       
+        convBwdData_desc.reset(new convolution_backward_data::desc(convAlgorithm
+                                                                   , init_bottom_md, init_weights_md, init_top_md
+                                                                   , convolutionStrides, padding, padding, padding_kind::zero));
+
+        try {
+            for(subEngineIndex=0; subEngineIndex < ep.getNumberOfSubEngines(); subEngineIndex++) {
+                try {
+                    convBwdData_pd.reset(new convolution_backward_data::primitive_desc(*convBwdData_desc,
+                                                                                       ep.getMKLDNNSubEngine(subEngineIndex), *convFwd_pd));
+                    
+                    convBwdWeights_pd.reset(new convolution_backward_weights::primitive_desc(*convBwdWeights_desc,
+                                                                                             ep.getMKLDNNSubEngine(subEngineIndex), *convFwd_pd));
+                }
+                catch(...) {
+                    continue;
+                }
+                break;
+            }
+            if (!convBwdData_pd || !convBwdWeights_pd)
+                break;
+        }
+        catch(...) {
+            continue;
+        }
+        break;
+    }
+
+    CHECK(convBwdData_pd);
+    CHECK(convBwdWeights_pd);
+    engine cpu_engine = CpuEngine::Instance().get_engine();
+
+    // ---- Create priv memory primitive descriptors stored as class members -------------
+    typedef typename memory::primitive_desc MemPD; // short name for memory::primitive_desc
+
+    shared_ptr<MemPD> prv_bwdd_bottom_diff_memory_pd(new MemPD(convBwdData_pd->diff_src_primitive_desc()));
+    shared_ptr<MemPD> prv_bwdd_top_diff_memory_pd(new MemPD(convBwdData_pd->diff_dst_primitive_desc()));
+    shared_ptr<MemPD> prv_bwdd_weights_data_memory_pd(new MemPD(convBwdData_pd->weights_primitive_desc()));
+
+    shared_ptr<MemPD> prv_bwdw_bottom_data_memory_pd(new MemPD(convBwdWeights_pd->src_primitive_desc()));
+    shared_ptr<MemPD> prv_bwdw_top_diff_memory_pd(new MemPD(convBwdWeights_pd->diff_dst_primitive_desc()));
+    shared_ptr<MemPD> prv_bwdw_weights_diff_memory_pd(new MemPD(convBwdWeights_pd->diff_weights_primitive_desc()));
+
+    // ---- Create usr memory primitive descriptors -------------
+    memory::format mfmt_nchw = memory::format::nchw;
+    memory::format weights_mfmt = ( g!= 1) ? memory::format::goihw : memory::format::oihw;
+
+    // ???!!! can we use usr memory primitive descrittors for backward??
+    shared_ptr<MemPD> usr_bottom_data_memory_pd(new MemPD({{bottom_tz}, mpcsn, mfmt_nchw}, cpu_engine));
+    shared_ptr<MemPD> usr_bias_data_memory_pd(new MemPD({{bias_tz}, mpcsn, memory::format::x}, cpu_engine));
+    shared_ptr<MemPD> usr_top_data_memory_pd(new MemPD({{top_tz}, mpcsn, mfmt_nchw}, cpu_engine));
+    shared_ptr<MemPD> usr_weights_data_memory_pd(new MemPD({{weights_tz}, mpcsn, weights_mfmt}, cpu_engine));
+
+
+    // ---  init primitive and prv_memory descriptors ----------------------
+    bwdd_bottom_diff.reset(new MKLDNNDiff<Dtype>(usr_bottom_data_memory_pd, prv_bwdd_bottom_diff_memory_pd, bottom[0], this));
+    bwdd_bottom_diff ->name = "bwdd_bottom_diff   @ " + this->layer_param_.name();
+    bwdd_bottom_diff_memory = bwdd_bottom_diff->create_output_memory();
+    bwdw_bottom_data.reset(new MKLDNNData<Dtype>(usr_bottom_data_memory_pd, prv_bwdw_bottom_data_memory_pd, bottom[0], this));
+    bwdw_bottom_data ->name = "bwdw_bottom_data   @ " + this->layer_param_.name();
+    bwdw_bottom_data_primitive = bwdw_bottom_data->create_input(false);
+
+    bwdd_top_diff.reset(new MKLDNNDiff<Dtype>(usr_top_data_memory_pd, prv_bwdd_top_diff_memory_pd, top[0], this));
+    bwdd_top_diff    ->name = "bwdd_top_diff      @ " + this->layer_param_.name();
+    bwdd_top_diff_primitive = bwdd_top_diff->create_input(false);
+    bwdw_top_diff.reset(new MKLDNNDiff<Dtype>(usr_top_data_memory_pd, prv_bwdw_top_diff_memory_pd, top[0], this));
+    bwdw_top_diff    ->name = "bwdw_top_diff      @ " + this->layer_param_.name();
+    bwdw_top_diff_primitive = bwdw_top_diff->create_input(false);
+
+    bwdd_weights_data.reset(new MKLDNNData<Dtype>(usr_weights_data_memory_pd, prv_bwdd_weights_data_memory_pd, this->blobs_[0].get(), this));
+    bwdd_weights_data->name = "bwdd_weights_data  @ " + this->layer_param_.name();
+    bwdd_weights_data_primitive = bwdd_weights_data->create_input(false);
+    bwdw_weights_diff.reset(new MKLDNNDiff<Dtype>(usr_weights_data_memory_pd, prv_bwdw_weights_diff_memory_pd, this->blobs_[0].get(), this));
+    bwdw_weights_diff->name = "bwdw_weights_diff  @ " + this->layer_param_.name();
+    bwdw_weights_diff_memory = bwdw_weights_diff->create_output_memory();
+
+    if (this->bias_term_) {
+        shared_ptr<MemPD> prv_bwdw_bias_diff_memory_pd(new MemPD(convBwdWeights_pd->diff_bias_primitive_desc()));
+        bwdw_bias_diff.reset(new MKLDNNDiff<Dtype>(usr_bias_data_memory_pd, prv_bwdw_bias_diff_memory_pd, this->blobs_[1].get(), this));
+        bwdw_bias_diff->name = "bwdw_bias_diff     @ " + this->layer_param_.name();
+        bwdw_bias_diff_memory = bwdw_bias_diff->create_output_memory();
+
+        convBwdWeights.reset(new convolution_backward_weights(*convBwdWeights_pd
+                        , *bwdw_bottom_data_primitive, *bwdw_top_diff_primitive
+                        , *bwdw_weights_diff_memory, *bwdw_bias_diff_memory));
+
+        bwdw_bias_diff->set_mkldnn_primitive(convBwdWeights);
+    } else {
+        convBwdWeights.reset(new convolution_backward_weights(*convBwdWeights_pd
+                        , *bwdw_bottom_data_primitive, *bwdw_top_diff_primitive
+                        , *bwdw_weights_diff_memory));
+    }
+
+    convBwdData.reset(new convolution_backward_data(*convBwdData_pd
+                    , *bwdd_top_diff_primitive, *bwdd_weights_data_primitive
+                    , *bwdd_bottom_diff_memory));
+
+    bwdd_bottom_diff->set_mkldnn_primitive(convBwdData);
+
+    bwdd_top_diff->set_mkldnn_primitive(convBwdData);         //Wrong passed primitive! (TODO: Checking!)
+    //MKLDNNPrimitive<Dtype> bwdd_top_diff_primitive_transfer(bwdd_top_diff_primitive);
+    //bwdd_top_diff->set_mkldnn_primitive(bwdd_top_diff_primitive_transfer);
+
+    //bwdd_weights_data->set_mkldnn_primitive(convBwdData);     //Wrong passed primitive! (For sure!)
+    MKLDNNPrimitive<Dtype> bwdd_weights_data_primitive_transfer(bwdd_weights_data_primitive);
+    bwdd_weights_data->set_mkldnn_primitive(bwdd_weights_data_primitive_transfer);
+
+
+    bwdw_bottom_data->set_mkldnn_primitive(convBwdWeights);   //Wrong passed primitive! (TODO: Checking!)
+    //MKLDNNPrimitive<Dtype> bwdw_bottom_data_primitive_transfer(bwdw_bottom_data_primitive);
+    //bwdw_bottom_data->set_mkldnn_primitive(bwdw_bottom_data_primitive_transfer);
+
+    bwdw_top_diff->set_mkldnn_primitive(convBwdWeights);      //Wrong passed primitive! (TODO: Checking!)
+    //MKLDNNPrimitive<Dtype> bwdw_top_diff_primitive_transfer(bwdw_top_diff_primitive);
+    //bwdw_top_diff->set_mkldnn_primitive(bwdw_top_diff_primitive_transfer);
+
+    bwdw_weights_diff->set_mkldnn_primitive(convBwdWeights);
+
+    // Names are for debugging purposes only.
+}
+
+
+template <typename Dtype>
+void MKLDNNConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top
+                                                , const vector<bool>& propagate_down
+                                                , const vector<Blob<Dtype>*>& bottom)
+{
+    VLOG(1) << "MKLDNNConvolutionLayer<Dtype>::Backward_cpu: " << this->layer_param_.name();
+    if( convBwdData_pd == NULL)
+        InitConvolutionBwd(top, propagate_down, bottom);
+    if (propagate_down[0]) {
+        // making reorders if needed.
+        bwdd_top_diff->sync_before_read();
+        bwdd_weights_data->sync_before_read();
+        bwdd_bottom_diff->sync_before_write();
+
+        PERFORMANCE_EVENT_ID_INIT(perf_id_bw_, PERFORMANCE_MKLDNN_NAME("BW"));
+        PERFORMANCE_MEASUREMENT_BEGIN();
+#ifdef DEBUG
         if (bottom[0]->prv_data() != NULL)
         {
             LOG(INFO) << "Debug: Bottom prv data: " << *bottom[0]->prv_data();
@@ -489,9 +529,9 @@ void MKLDNNConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top
         else
         {
             LOG(INFO) << "Debug: Top prv diff is NULL!";
-            LOG(INFO) << "Debug: Top cpu diff: " << *top[0]->cpu_diff();
-        }
-
+            LOG(INFO) << "Debug: Top cpu diff: " << *top[0]->cpu_diff();
+        }
+
         if (this->blobs_[0]->prv_data() != NULL)
         {
             LOG(INFO) << "Debug: Weights prv data from blobs_[0]: " << *this->blobs_[0]->prv_data();
@@ -499,64 +539,64 @@ void MKLDNNConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top
         else
         {
             LOG(INFO) << "Debug: Weights prv data is NULL!";
-            LOG(INFO) << "Debug: Weights cpu data: " << *this->blobs_[0]->cpu_data();
-        }
-        //Before submit, so get_prv_ptr() always has the value
-        LOG(INFO) << "Debug: Weights prv data from get_prv_ptr: " << *bwdd_weights_data->get_prv_ptr();
-#endif
-        convBwdData.submit();
-#ifdef DEBUG
+            LOG(INFO) << "Debug: Weights cpu data: " << *this->blobs_[0]->cpu_data();
+        }
+        //Before submit, so get_prv_ptr() always has the value
+        LOG(INFO) << "Debug: Weights prv data from get_prv_ptr: " << *bwdd_weights_data->get_prv_ptr();
+#endif
+        convBwdData.submit();
+#ifdef DEBUG
         if (bottom[0]->prv_diff() != NULL)
         {
             LOG(INFO) << "Debug: Bottom prv diff: " << *bottom[0]->prv_diff();
         }
         else
         {
-            LOG(INFO) << "Debug: Bottom prv diff is NULL!";
-            LOG(INFO) << "Debug: Bottom cpu diff: " << *bottom[0]->cpu_diff();
-        }
-#endif
-        PERFORMANCE_MEASUREMENT_END_ID(perf_id_bw_);
-    }
-    if (this->param_propagate_down(0)) {
-        // making reorders if needed.
-        bwdw_top_diff->sync_before_read();
-        bwdw_bottom_data->sync_before_read();
-        // update top that head at prv
-        bwdw_weights_diff->sync_before_write();
-        if (this->param_propagate_down(1)) {
-            CHECK(bwdw_bias_diff);
-            bwdw_bias_diff->sync_before_write();
-        }
-        PERFORMANCE_EVENT_ID_INIT(perf_id_bw_weights_,
-          PERFORMANCE_MKLDNN_NAME_DETAILED("BW", "_weights"));
-        PERFORMANCE_MEASUREMENT_BEGIN();
-        convBwdWeights.submit();
-        PERFORMANCE_MEASUREMENT_END_ID(perf_id_bw_weights_);
-    }
-}
-
-#ifdef CPU_ONLY
-STUB_GPU(MKLDNNConvolutionLayer);
-#else
-
-template <typename Dtype>
-void MKLDNNConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom
-                                                , const vector<Blob<Dtype>*>& top)
-{
-    NOT_IMPLEMENTED;
-}
-
-template <typename Dtype>
-void MKLDNNConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top
-                                                , const vector<bool>& propagate_down
-                                                , const vector<Blob<Dtype>*>& bottom)
-{
-    NOT_IMPLEMENTED;
-}
-#endif
-
-INSTANTIATE_CLASS(MKLDNNConvolutionLayer);
-
-}  // namespace caffe
-#endif  // #ifdef MKLDNN_SUPPORTED
+            LOG(INFO) << "Debug: Bottom prv diff is NULL!";
+            LOG(INFO) << "Debug: Bottom cpu diff: " << *bottom[0]->cpu_diff();
+        }
+#endif
+        PERFORMANCE_MEASUREMENT_END_ID(perf_id_bw_);
+    }
+    if (this->param_propagate_down(0)) {
+        // making reorders if needed.
+        bwdw_top_diff->sync_before_read();
+        bwdw_bottom_data->sync_before_read();
+        // update top that head at prv
+        bwdw_weights_diff->sync_before_write();
+        if (this->param_propagate_down(1)) {
+            CHECK(bwdw_bias_diff);
+            bwdw_bias_diff->sync_before_write();
+        }
+        PERFORMANCE_EVENT_ID_INIT(perf_id_bw_weights_,
+          PERFORMANCE_MKLDNN_NAME_DETAILED("BW", "_weights"));
+        PERFORMANCE_MEASUREMENT_BEGIN();
+        convBwdWeights.submit();
+        PERFORMANCE_MEASUREMENT_END_ID(perf_id_bw_weights_);
+    }
+}
+
+#ifdef CPU_ONLY
+STUB_GPU(MKLDNNConvolutionLayer);
+#else
+
+template <typename Dtype>
+void MKLDNNConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom
+                                                , const vector<Blob<Dtype>*>& top)
+{
+    NOT_IMPLEMENTED;
+}
+
+template <typename Dtype>
+void MKLDNNConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top
+                                                , const vector<bool>& propagate_down
+                                                , const vector<Blob<Dtype>*>& bottom)
+{
+    NOT_IMPLEMENTED;
+}
+#endif
+
+INSTANTIATE_CLASS(MKLDNNConvolutionLayer);
+
+}  // namespace caffe
+#endif  // #ifdef MKLDNN_SUPPORTED
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 9f9b0443e..ab900378f 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -980,6 +980,7 @@ message ConvolutionParameter {
   optional bool force_nd_im2col = 17 [default = false];
   optional bool relu = 19 [default = false];
   optional float negative_slope = 20 [default = 0];
+  optional string conv_algorithm = 21 [default = "direct"];
 }
 
 message CropParameter {

From da63fb383dab752a95cafd34a6c5288a3cad5303 Mon Sep 17 00:00:00 2001
From: linxinan <xinan.lin@intel.com>
Date: Fri, 23 Jun 2017 22:18:04 +0800
Subject: [PATCH 28/34] Add mkl_deconvolution layer and corresponding unitest

Change-Id: I1c56fb598b6e517ef85b7177d6fe81d3f17b851b
---
 include/caffe/layers/mkl_layers.hpp           |  76 +++
 src/caffe/layer_factory.cpp                   |  52 ++
 src/caffe/layers/deconv_layer.cpp             |   1 -
 src/caffe/layers/mkl_deconvolution_layer.cpp  | 630 ++++++++++++++++++
 .../test/test_mkl_deconvolution_layer.cpp     | 419 ++++++++++++
 5 files changed, 1177 insertions(+), 1 deletion(-)
 create mode 100644 src/caffe/layers/mkl_deconvolution_layer.cpp
 create mode 100644 src/caffe/test/test_mkl_deconvolution_layer.cpp

diff --git a/include/caffe/layers/mkl_layers.hpp b/include/caffe/layers/mkl_layers.hpp
index 4f714d406..0d5d66416 100644
--- a/include/caffe/layers/mkl_layers.hpp
+++ b/include/caffe/layers/mkl_layers.hpp
@@ -46,6 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "caffe/common.hpp"
 #include "caffe/layers/base_conv_layer.hpp"
 #include "caffe/layers/conv_layer.hpp"
+#include "caffe/layers/deconv_layer.hpp"
 #include "caffe/layers/neuron_layer.hpp"
 #include "caffe/proto/caffe.pb.h"
 
@@ -133,7 +134,82 @@ class MKLConvolutionLayer : public ConvolutionLayer<Dtype> {
   PERFORMANCE_EVENT_ID_DECL(perf_id_bw_bias_);
 };
 
+template <typename Dtype>
+class MKLDeconvolutionLayer : public DeconvolutionLayer<Dtype> {
+ public:
+  explicit MKLDeconvolutionLayer(const LayerParameter& param);
+
+  virtual ~MKLDeconvolutionLayer();
+
+  virtual inline const char* type() const { return "MklDeconvolution"; }
+
+ protected:
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+                           const vector<Blob<Dtype>*>& top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+                           const vector<Blob<Dtype>*>& top);
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+                            const vector<bool>& propagate_down,
+                            const vector<Blob<Dtype>*>& bottom);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+                            const vector<bool>& propagate_down,
+                            const vector<Blob<Dtype>*>& bottom);
+  // Customized methods
+  void Init(const vector<Blob<Dtype>*>& bottom,
+            const vector<Blob<Dtype>*>& top);
+
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+                          const vector<Blob<Dtype>*>& top);
+  virtual void compute_output_shape();
+
+  void Reshape(const vector<Blob<Dtype>*>& bottom,
+          const vector<Blob<Dtype>*>& top);
+
+ private:
+  /* Fwd step */
+  shared_ptr<MKLData<Dtype> > fwd_bottom_data, fwd_top_data, fwd_filter_data,
+                                 fwd_bias_data;
+  dnnPrimitive_t convolutionFwd;
 
+  /* Bwd data step */
+  shared_ptr<MKLDiff<Dtype> > bwdd_top_diff, bwdd_bottom_diff;
+  shared_ptr<MKLData<Dtype> > bwdd_filter_data;
+  dnnPrimitive_t convolutionBwdData;
+
+  /* Bwd filter step */
+  shared_ptr<MKLDiff<Dtype> > bwdf_top_diff, bwdf_filter_diff;
+  shared_ptr<MKLDiff<Dtype> > bwdf2fwd_filter_diff;
+  shared_ptr<MKLData<Dtype> > bwdf_bottom_data;
+  dnnPrimitive_t convolutionBwdFilter;
+
+  /* Bwd bias step */
+  shared_ptr<MKLDiff<Dtype> > bwdb_top_diff, bwdb_bias_diff;
+  dnnPrimitive_t convolutionBwdBias;
+
+  /* In case of (iter_size > 1) we need additional buffers */
+  shared_ptr<MKLDiff<Dtype> > bwdf_filter_diff_iter,
+                              bwdb_bias_diff_iter;
+
+  // TODO: temp. compatibility vs. older cafe
+  size_t width_,
+         height_,
+         width_out_,
+         height_out_,
+         kernel_w_,
+         kernel_h_,
+         stride_w_,
+         stride_h_;
+  int    pad_w_,
+         pad_h_;
+
+  bool bprop_unpack_called;
+
+  PERFORMANCE_EVENT_ID_DECL(perf_id_fw_);
+  PERFORMANCE_EVENT_ID_DECL(perf_id_bw_);
+  PERFORMANCE_EVENT_ID_DECL(perf_id_bw_prop_);
+  PERFORMANCE_EVENT_ID_DECL(perf_id_bw_diff_);
+  PERFORMANCE_EVENT_ID_DECL(perf_id_bw_bias_);
+};
 /**
  * @brief Normalize the input in a local region across feature maps.
  */
diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp
index fd179a66c..2b52007cc 100644
--- a/src/caffe/layer_factory.cpp
+++ b/src/caffe/layer_factory.cpp
@@ -162,6 +162,58 @@ shared_ptr<Layer<Dtype> > GetConvolutionLayer(
 
 REGISTER_LAYER_CREATOR(Convolution, GetConvolutionLayer);
 
+// Get deconvolution layer according to engine.
+template <typename Dtype>
+shared_ptr<Layer<Dtype> > GetDeconvolutionLayer(
+    const LayerParameter& param) {
+  ConvolutionParameter conv_param = param.convolution_param();
+  ConvolutionParameter_Engine engine = conv_param.engine();
+
+#if defined(MKL2017_SUPPORTED)
+  bool use_dilation = false;
+  for (int i = 0; i < conv_param.dilation_size(); ++i) {
+    if (conv_param.dilation(i) > 1) {
+      use_dilation = true;
+    }
+  }
+#endif
+
+  // New, more flexible way of providing engine
+  if (engine == ConvolutionParameter_Engine_DEFAULT && param.engine() != "") {
+    EngineParser ep(param.engine());
+
+    if (ep.isEngine("CAFFE")) {
+      engine = ConvolutionParameter_Engine_CAFFE;
+    }
+#ifdef MKL2017_SUPPORTED
+    else if (!use_dilation && ep.isEngine("MKL2017")) {
+      engine = ConvolutionParameter_Engine_MKL2017;
+    }
+#endif
+
+  }
+
+  if (engine == ConvolutionParameter_Engine_DEFAULT) {
+    engine = ConvolutionParameter_Engine_CAFFE;
+  }
+  if (engine == ConvolutionParameter_Engine_CAFFE) {
+    return shared_ptr<Layer<Dtype> >(new DeconvolutionLayer<Dtype>(param));
+#ifdef MKL2017_SUPPORTED
+  } else if (engine == ConvolutionParameter_Engine_MKL2017) {
+    if (use_dilation) {
+      LOG(FATAL) << "MKL2017 doesn't support the dilated convolution at Layer "
+                 << param.name();
+    }
+    return shared_ptr<Layer<Dtype> >(new MKLDeconvolutionLayer<Dtype>(param));
+#endif
+  } else {
+    LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
+  }
+  return shared_ptr<Layer<Dtype> >();
+}
+
+REGISTER_LAYER_CREATOR(Deconvolution, GetDeconvolutionLayer);
+
 // Get inner_product layer according to engine.
 template <typename Dtype>
 shared_ptr<Layer<Dtype> > GetInnerProductLayer(
diff --git a/src/caffe/layers/deconv_layer.cpp b/src/caffe/layers/deconv_layer.cpp
index 87e8df7ed..bc6dca4a4 100644
--- a/src/caffe/layers/deconv_layer.cpp
+++ b/src/caffe/layers/deconv_layer.cpp
@@ -147,6 +147,5 @@ STUB_GPU(DeconvolutionLayer);
 #endif
 
 INSTANTIATE_CLASS(DeconvolutionLayer);
-REGISTER_LAYER_CLASS(Deconvolution);
 
 }  // namespace caffe
diff --git a/src/caffe/layers/mkl_deconvolution_layer.cpp b/src/caffe/layers/mkl_deconvolution_layer.cpp
new file mode 100644
index 000000000..779eee71c
--- /dev/null
+++ b/src/caffe/layers/mkl_deconvolution_layer.cpp
@@ -0,0 +1,630 @@
+/*
+All modification made by Intel Corporation: © 2016 Intel Corporation
+
+All contributions by the University of California:
+Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
+All rights reserved.
+
+All other contributions:
+Copyright (c) 2014, 2015, the respective contributors
+All rights reserved.
+For the list of contributors go to https://github.com/BVLC/caffe/blob/master/CONTRIBUTORS.md
+
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef MKL2017_SUPPORTED
+#include <algorithm>
+#include <cstdlib>
+#include <vector>
+
+#include "caffe/filler.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/layers/mkl_layers.hpp"
+#include "caffe/util/performance.hpp"
+#include "mkl_service.h"
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+
+static int getMKLBuildDate() {
+  static int build = 0;
+  if (build == 0) {
+    MKLVersion v;
+    mkl_get_version(&v);
+    build = atoi(v.Build);
+  }
+  return build;
+}
+
+namespace caffe {
+template <typename Dtype>
+MKLDeconvolutionLayer<Dtype>::MKLDeconvolutionLayer(
+  const LayerParameter& param)
+      : DeconvolutionLayer<Dtype>(param),
+        fwd_bottom_data(new MKLData<Dtype>()),
+        fwd_top_data(new MKLData<Dtype>()),
+        fwd_filter_data(new MKLData<Dtype>()),
+        fwd_bias_data(new MKLData<Dtype>()),
+        convolutionFwd(NULL),
+        bwdd_top_diff(new MKLDiff<Dtype>()),
+        bwdd_bottom_diff(new MKLDiff<Dtype>()),
+        bwdd_filter_data(new MKLData<Dtype>()),
+        convolutionBwdData(static_cast<dnnPrimitive_t>(NULL)),
+        bwdf_top_diff(new MKLDiff<Dtype>()),
+        bwdf_filter_diff(new MKLDiff<Dtype>()),
+        bwdf2fwd_filter_diff(new MKLDiff<Dtype>()),
+        bwdf_bottom_data(new MKLData<Dtype>()),
+        convolutionBwdFilter(static_cast<dnnPrimitive_t>(NULL)),
+        bwdb_top_diff(new MKLDiff<Dtype>()),
+        bwdb_bias_diff(new MKLDiff<Dtype>()),
+        convolutionBwdBias(static_cast<dnnPrimitive_t>(NULL)),
+        bwdf_filter_diff_iter(new MKLDiff<Dtype>()),
+        bwdb_bias_diff_iter(new MKLDiff<Dtype>()) {
+          PERFORMANCE_EVENT_ID_RESET(perf_id_fw_);
+          PERFORMANCE_EVENT_ID_RESET(perf_id_bw_);
+          PERFORMANCE_EVENT_ID_RESET(perf_id_bw_prop_);
+          PERFORMANCE_EVENT_ID_RESET(perf_id_bw_diff_);
+          PERFORMANCE_EVENT_ID_RESET(perf_id_bw_bias_);
+        }
+
+template <typename Dtype>
+void MKLDeconvolutionLayer<Dtype>::compute_output_shape() {
+  DeconvolutionLayer<Dtype>::compute_output_shape();
+  this->height_out_ = this->stride_h_ * (this->height_ - 1)
+      + this->kernel_h_ - 2 * this->pad_h_ ;
+  this->width_out_ = this->stride_w_ * (this->width_ - 1)
+      + this->kernel_w_ - 2 * this->pad_w_ ;
+}
+
+template <typename Dtype>
+MKLDeconvolutionLayer<Dtype>::~MKLDeconvolutionLayer() {
+    dnnDelete<Dtype>(convolutionFwd);
+    dnnDelete<Dtype>(convolutionBwdData);
+    dnnDelete<Dtype>(convolutionBwdFilter);
+    if (this->bias_term_)
+        dnnDelete<Dtype>(convolutionBwdBias);
+}
+
+template <typename Dtype>
+void MKLDeconvolutionLayer<Dtype>::Init(
+      const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+
+#ifdef _OPENMP
+  this->num_of_threads_ = omp_get_max_threads() < bottom[0]->shape(0) ?
+                    omp_get_max_threads() : bottom[0]->shape(0);
+  if (this->num_of_threads_ < 1) {
+     LOG(WARNING) << "DeConv layer: omp_get_max_threads() ="
+                  << this->num_of_threads_;
+     this->num_of_threads_ = 1;
+  }
+#endif
+
+
+  this->width_ = bottom[0]->width();
+  this->height_ = bottom[0]->height();
+  this->num_ = bottom[0]->num();
+
+  // TODO: clean up this
+  kernel_w_ = this->kernel_shape_.cpu_data()[1];
+  kernel_h_ = this->kernel_shape_.cpu_data()[0];
+  stride_w_ = this->stride_.cpu_data()[1];
+  stride_h_ = this->stride_.cpu_data()[0];
+  pad_w_ = this->pad_.cpu_data()[1];
+  pad_h_ = this->pad_.cpu_data()[0];
+
+  this->bottom_shape_ = &bottom[0]->shape();
+  compute_output_shape();
+  int status;
+  size_t n, g;
+  size_t iw, ih, ic;
+  size_t ow, oh, oc;
+  size_t kw, kh; /* filter */
+  size_t dimension = 4;
+
+  g  = std::max(this->group_, 1);
+  n  = this->num_;
+  iw = this->width_;
+  ih = this->height_;
+  ic = this->channels_;
+
+  ow = this->width_out_;
+  oh = this->height_out_;
+  oc = this->num_output_;
+
+  kw = this->kernel_w_;
+  kh = this->kernel_h_;
+
+  size_t bdata_sizes[4] = {iw, ih, ic, n};
+  size_t bdata_strides[4] = {1, iw, iw*ih, iw*ih*ic};
+
+  /* starting with MKL 2017 Gold in case of groups filter layout
+   * becomes 5D, i.e. groups become a separate dimension */
+  size_t g_mkl2017 = g;
+  size_t f_dimension = dimension + (g != 1);
+  if (getMKLBuildDate() < 20160701) {
+      g_mkl2017 = 1;
+      f_dimension = dimension;
+  }
+
+  size_t fdata_sizes[5] = {kw, kh, oc/g, ic/g_mkl2017, g_mkl2017};
+  size_t fdata_strides[5]  = {1, kw, kw*kh, kw*kh*oc/g, kw*kh*ic/g*oc/g};
+
+  size_t bias_sizes[1] = {oc};
+  size_t bias_strides[1] = {1};
+
+  size_t tdata_sizes[4] = {ow, oh, oc, n};
+  size_t tdata_strides[4]  = {1, ow, ow*oh, ow*oh*oc};
+
+  size_t convolutionStrides[2] = {this->stride_w_, this->stride_h_};
+  int    inputOffset[2] = {-this->pad_w_, -this->pad_h_};
+
+  // Names are for debugging purposes only.
+  fwd_bottom_data ->name = "fwd_bottom_data   @ " + this->layer_param_.name();
+  fwd_top_data    ->name = "fwd_top_data      @ " + this->layer_param_.name();
+  fwd_filter_data ->name = "fwd_filter_data   @ " + this->layer_param_.name();
+  fwd_bias_data   ->name = "fwd_bias_data     @ " + this->layer_param_.name();
+  bwdd_top_diff   ->name = "bwdd_top_diff     @ " + this->layer_param_.name();
+  bwdd_bottom_diff->name = "bwdd_bottom_diff  @ " + this->layer_param_.name();
+  bwdd_filter_data->name = "bwdd_filter_data  @ " + this->layer_param_.name();
+  bwdf_top_diff   ->name = "bwdf_top_diff     @ " + this->layer_param_.name();
+  bwdf_bottom_data->name = "bwdf_bottom_data  @ " + this->layer_param_.name();
+  bwdf_filter_diff->name = "bwdf_filter_diff  @ " + this->layer_param_.name();
+  bwdf2fwd_filter_diff->name =
+                       "bwdf2fwd_filter_diff  @ " + this->layer_param_.name();
+  bwdb_top_diff   ->name = "bwdb_top_diff     @ " + this->layer_param_.name();
+  bwdb_bias_diff  ->name = "bwdb_bias_diff    @ " + this->layer_param_.name();
+
+
+/*
+ * Forward setup, implemented by convolutionBwdData
+ */
+  dnnDelete<Dtype>(convolutionBwdData);
+  status = dnnGroupsConvolutionCreateBackwardData<Dtype>(
+    &convolutionBwdData,
+    NULL,
+    dnnAlgorithmConvolutionDirect,
+    g,
+    dimension,
+    tdata_sizes,
+    bdata_sizes,
+    fdata_sizes,
+    convolutionStrides,
+    inputOffset,
+    dnnBorderZeros);
+  CHECK_EQ(status, 0)
+          << "Failed dnnConvolutionCreateBackwardData with status "
+          << status << "\n";
+  fwd_bottom_data->create_layouts(convolutionBwdData, dnnResourceDiffDst, dimension,
+                                  bdata_sizes, bdata_strides);
+  fwd_top_data   ->create_layouts(convolutionBwdData, dnnResourceDiffSrc, dimension,
+                                  tdata_sizes, tdata_strides);
+  fwd_filter_data->create_layouts(convolutionBwdData, dnnResourceFilter,
+                                  f_dimension, fdata_sizes, fdata_strides);
+
+/*
+ * Backward by Data setup, implemented by  convolutionFwd
+ */
+
+  dnnDelete<Dtype>(convolutionFwd);
+
+  status = dnnGroupsConvolutionCreateForward<Dtype>(
+          &convolutionFwd,
+          NULL,
+          dnnAlgorithmConvolutionDirect,
+          g,
+          dimension,
+          tdata_sizes,
+          bdata_sizes,
+          fdata_sizes,
+          convolutionStrides,
+          inputOffset,
+          dnnBorderZeros);
+
+  CHECK_EQ(status, 0)
+          << "Failed dnnCreateConvolution<Dtype>(dnnForward) with status "
+          << status << "\n";
+
+  bwdd_bottom_diff->create_layouts(convolutionFwd, dnnResourceDst,
+                                   dimension, bdata_sizes, bdata_strides);
+  bwdd_top_diff   ->create_layouts(convolutionFwd, dnnResourceSrc,
+                                   dimension, tdata_sizes, tdata_strides);
+  bwdd_filter_data->create_layouts(convolutionFwd, dnnResourceFilter,
+                                   f_dimension, fdata_sizes, fdata_strides);
+
+/*
+ * Backward by filter layer setup
+ */
+  dnnDelete<Dtype>(convolutionBwdFilter);
+  status = dnnGroupsConvolutionCreateBackwardFilter<Dtype>(
+    &convolutionBwdFilter,
+    NULL,
+    dnnAlgorithmConvolutionDirect,
+    g,
+    dimension,
+    tdata_sizes,
+    bdata_sizes,
+    fdata_sizes,
+    convolutionStrides,
+    inputOffset,
+    dnnBorderZeros);
+  CHECK_EQ(status, 0)
+          << "Failed dnnConvolutionCreateBackwardFilter with status "
+          << status << "\n";
+
+  bwdf_bottom_data->create_layouts(convolutionBwdFilter, dnnResourceDiffDst,
+                                   dimension, bdata_sizes, bdata_strides);
+  bwdf_top_diff   ->create_layouts(convolutionBwdFilter, dnnResourceSrc,
+                                   dimension, tdata_sizes, tdata_strides);
+  bwdf_filter_diff->create_layouts(convolutionBwdData, dnnResourceFilter,
+                                   f_dimension, fdata_sizes, fdata_strides);
+  // support for (iter_size > 1) requires additional buffer
+  bwdf_filter_diff_iter->create_layouts(convolutionFwd, dnnResourceFilter,
+                                   f_dimension, fdata_sizes, fdata_strides);
+
+  // Note: this caused some trouble for older MKL
+  if (getMKLBuildDate() > 20160701) {
+    // bwdf2fwd_filter_diff:
+    // layout_int = internal layout of weight diff
+    // layout_usr = internal layout of weight data on forward convolution
+    bwdf2fwd_filter_diff->create_internal_layout(convolutionBwdFilter,
+        dnnResourceDiffFilter);
+    bwdf2fwd_filter_diff->remove_user_layout();
+    status = dnnLayoutCreateFromPrimitive<Dtype>(
+        &bwdf2fwd_filter_diff->layout_usr, convolutionBwdData, dnnResourceFilter);
+    CHECK_EQ(status, 0) << "Failed dnnLayoutCreateFromPrimitive with status "
+            << status << "\n";
+
+    bwdf2fwd_filter_diff->create_conversions();
+  }
+
+/*
+ * Backward by bias layer setup
+ */
+  if (this->bias_term_) {
+    dnnDelete<Dtype>(convolutionBwdBias);
+    status = dnnGroupsConvolutionCreateBackwardBias<Dtype>(
+      &convolutionBwdBias,
+      NULL,
+      dnnAlgorithmConvolutionDirect,
+      g,
+      dimension,
+      tdata_sizes);
+    CHECK_EQ(status, 0)
+            << "Failed dnnConvolutionCreateBackwardBias with status "
+            << status << "\n";
+
+    bwdb_top_diff->create_layouts(convolutionBwdBias, dnnResourceDiffDst,
+                                  dimension, tdata_sizes, tdata_strides);
+    bwdb_bias_diff->create_layouts(convolutionBwdBias, dnnResourceDiffBias,
+                                   1, bias_sizes, bias_strides);
+    // support for (iter_size > 1) requires additional buffer
+    bwdb_bias_diff_iter->create_layouts(convolutionBwdBias, dnnResourceDiffBias,
+                                        1, bias_sizes, bias_strides);
+  }
+
+}
+
+template <typename Dtype>
+void MKLDeconvolutionLayer<Dtype>::LayerSetUp(
+      const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  DeconvolutionLayer<Dtype>::LayerSetUp(bottom, top);
+
+  Init(bottom, top);
+}
+
+template <typename Dtype>
+void MKLDeconvolutionLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  bool reinitialize = (this->width_ == bottom[0]->width() &&
+                       this->height_ == bottom[0]->height() &&
+                       this->channels_ == bottom[0]->channels() &&
+                       this->num_ == bottom[0]->num()) ? false : true;
+
+  BaseConvolutionLayer<Dtype>::ReshapeForMKL(bottom, top);
+
+  if (reinitialize == true) {
+    Init(bottom, top);
+  }
+}
+
+template <typename Dtype>
+void MKLDeconvolutionLayer<Dtype>::Forward_cpu(
+  const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
+  int status;
+  size_t n, g;
+  size_t iw, ih, ic;
+  size_t ow, oh, oc;
+
+  g  = this->group_;
+  n  = this->num_;
+  iw = this->width_;
+  ih = this->height_;
+  ic = this->channels_/g;
+
+  CHECK(bottom[0]->width()    == iw &&
+        bottom[0]->height()   == ih &&
+        bottom[0]->channels() == ic*g &&
+        bottom[0]->num()      == n)
+          << "Inclompatible shape of bottom with layer";
+
+  ow = this->width_out_;
+  oh = this->height_out_;
+  oc = this->num_output_/g;
+  CHECK(top[0]->width()    == ow &&
+        top[0]->height()   == oh &&
+        top[0]->channels() == oc*g &&
+        top[0]->num()      == n) << "Inclompatible shape of bottom with layer";
+
+
+  void *res_convolutionBwdData[dnnResourceNumber];
+
+  res_convolutionBwdData[dnnResourceDiffDst] =
+      fwd_bottom_data->get_converted_prv(bottom[0], false);
+  // Currently this conversion adds padding to weights.
+  // We don't want that to be stored in the weights prv_ptr_
+  res_convolutionBwdData[dnnResourceFilter]  =
+      fwd_filter_data->get_converted_prv(this->blobs_[0].get(), true);
+
+  if (fwd_top_data->conversion_needed()) {
+      top[0]->set_prv_data_descriptor(fwd_top_data);
+      res_convolutionBwdData[dnnResourceDiffSrc] =
+          reinterpret_cast<void *>(top[0]->mutable_prv_data());
+  } else {
+      res_convolutionBwdData[dnnResourceDiffSrc] =
+          top[0]->mutable_cpu_data();
+  }
+
+  PERFORMANCE_EVENT_ID_INIT(perf_id_fw_, PERFORMANCE_MKL_NAME("FW"));
+  PERFORMANCE_MEASUREMENT_BEGIN();
+  status = dnnExecute<Dtype>(convolutionBwdData, res_convolutionBwdData);
+  PERFORMANCE_MEASUREMENT_END_ID(perf_id_fw_);
+
+  CHECK_EQ(status, 0) << "Forward deconvolution failed with status " << status;
+
+  if (this->bias_term_) {
+      const Dtype* bias = this->blobs_[1]->cpu_data();
+      Dtype* top_data = top[0]->mutable_cpu_data();
+
+#ifdef _OPENMP
+#   pragma omp parallel for num_threads(this->num_of_threads_)
+#endif
+      for (int n = 0; n < this->num_; ++n) {
+          this->forward_cpu_bias(top_data + n * this->top_dim_, bias);
+      }
+  }
+}
+
+template <typename Dtype>
+void MKLDeconvolutionLayer<Dtype>::Backward_cpu(
+  const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
+  const vector<Blob<Dtype>*>& bottom) {
+  int status;
+  size_t n, g;
+  size_t iw, ih, ic;
+  size_t ow, oh, oc;
+
+  g  = this->group_;
+  n  = this->num_;
+  iw = this->width_;
+  ih = this->height_;
+  ic = this->channels_/g;
+
+  CHECK(bottom[0]->width()    == iw &&
+        bottom[0]->height()   == ih &&
+        bottom[0]->channels() == ic*g &&
+        bottom[0]->num()      == n)
+          << "Incompatible shape of bottom with layer";
+
+  ow = this->width_out_;
+  oh = this->height_out_;
+  oc = this->num_output_/g;
+  CHECK(top[0]->width()    == ow &&
+        top[0]->height()   == oh &&
+        top[0]->channels() == oc*g &&
+        top[0]->num()      == n) << "Incompatible shape of top with layer";
+
+  if (propagate_down[0]) {
+
+      void *res_convolutionFwd[dnnResourceNumber];
+      res_convolutionFwd[dnnResourceSrc] =
+          bwdd_top_diff->get_converted_prv(top[0], true);
+    // Currently this conversion adds padding to weights.
+    // We don't want that to be stored in the weights prv_ptr_
+      res_convolutionFwd[dnnResourceFilter] =
+          bwdd_filter_data->get_converted_prv(this->blobs_[0].get(), false);
+
+    if (bwdd_bottom_diff->conversion_needed()) {
+      bottom[0]->set_prv_diff_descriptor(bwdd_bottom_diff);
+      res_convolutionFwd[dnnResourceDst] =
+          bottom[0]->mutable_prv_diff();
+    } else {
+      res_convolutionFwd[dnnResourceDst] =
+          bottom[0]->mutable_cpu_diff();
+    }
+    PERFORMANCE_EVENT_ID_INIT(perf_id_bw_prop_,
+        PERFORMANCE_MKL_NAME_DETAILED("BW", "_prop"));
+    PERFORMANCE_MEASUREMENT_BEGIN();
+    status = dnnExecute<Dtype>(convolutionFwd, res_convolutionFwd);
+    PERFORMANCE_MEASUREMENT_END_ID(perf_id_bw_prop_);
+
+    CHECK_EQ(status, 0) << "Backward Data deconv failed with status " << status;
+  }
+
+  if (this->param_propagate_down(0)) {
+    void *res_convolutionBwdFilter[dnnResourceNumber];
+    res_convolutionBwdFilter[dnnResourceDiffDst] =
+        bwdf_bottom_data->get_converted_prv(bottom[0], false);
+
+    res_convolutionBwdFilter[dnnResourceSrc] =
+            bwdf_top_diff->get_converted_prv(top[0], false);
+
+
+    if (bwdf_filter_diff->conversion_needed()) {
+      this->blobs_[0]->set_prv_diff_descriptor(bwdf_filter_diff);
+    }
+    if (bwdf2fwd_filter_diff->conversion_needed()) {
+      // Different layouts in fwd filters vs bwd diffs
+      res_convolutionBwdFilter[dnnResourceDiffFilter] =
+              reinterpret_cast<void *>(bwdf2fwd_filter_diff->prv_ptr());
+    } else {
+      if (Caffe::iter_size() > 1) {
+        // if (iter_size > 1) then diffs are accumulated across iterations
+        res_convolutionBwdFilter[dnnResourceDiffFilter] =
+              bwdf_filter_diff_iter->prv_ptr();
+      } else {
+        if (bwdf_filter_diff->conversion_needed()) {
+          res_convolutionBwdFilter[dnnResourceDiffFilter] =
+                this->blobs_[0]->mutable_prv_diff();
+        } else {
+        res_convolutionBwdFilter[dnnResourceDiffFilter] =
+              this->blobs_[0]->mutable_cpu_diff();
+        }
+      }
+    }
+    PERFORMANCE_EVENT_ID_INIT(perf_id_bw_, PERFORMANCE_MKL_NAME("BW"));
+    PERFORMANCE_MEASUREMENT_BEGIN();
+    status = dnnExecute<Dtype>(convolutionBwdFilter, res_convolutionBwdFilter);
+    PERFORMANCE_MEASUREMENT_END_ID(perf_id_bw_);
+
+    CHECK_EQ(status, 0) << "Backward Filter conv failed with status " << status;
+
+    if (bwdf2fwd_filter_diff->conversion_needed()) {
+      // Different layouts in fwd filters vs bwd diffs
+      void *convert_resources[dnnResourceNumber];
+      convert_resources[dnnResourceFrom] = bwdf2fwd_filter_diff->prv_ptr();
+
+      if (Caffe::iter_size() > 1) {
+        // if (iter_size > 1) then diffs are accumulated across iterations
+        convert_resources[dnnResourceTo] =
+              bwdf_filter_diff_iter->prv_ptr();
+        if (bwdf_filter_diff->conversion_needed())
+          DLOG(INFO) << "convert priv => priv  " << bwdf2fwd_filter_diff->name
+                     << " => " << bwdf_filter_diff->name;
+        else
+          DLOG(INFO) << "convert priv =>       " << bwdf2fwd_filter_diff->name
+                     << " =>";
+      } else {
+        if (bwdf_filter_diff->conversion_needed()) {
+          convert_resources[dnnResourceTo] =
+                this->blobs_[0]->mutable_prv_diff();
+          DLOG(INFO) << "convert priv => priv  " << bwdf2fwd_filter_diff->name
+                     << " => " << bwdf_filter_diff->name;
+        } else {
+          convert_resources[dnnResourceTo] =
+                this->blobs_[0]->mutable_cpu_diff();
+          DLOG(INFO) << "convert priv =>       " << bwdf2fwd_filter_diff->name
+                     << " =>";
+        }
+      }
+
+      PERFORMANCE_EVENT_ID_INIT(perf_id_bw_diff_,
+          PERFORMANCE_MKL_NAME_DETAILED("BW", "_diff"));
+      PERFORMANCE_MEASUREMENT_BEGIN();
+      status = dnnExecute<Dtype>(bwdf2fwd_filter_diff->convert_from_int,
+              convert_resources);
+      PERFORMANCE_MEASUREMENT_END_ID(perf_id_bw_diff_);
+
+      CHECK_EQ(status, 0) << "Conversion failed with status " << status;
+    }
+
+    if (Caffe::iter_size() > 1) {
+      // if (iter_size > 1) then diffs are accumulated across iterations
+      if (bwdf_filter_diff->conversion_needed()) {
+        caffe_axpy<Dtype>((const int)this->blobs_[0]->prv_diff_count(), 1,
+              reinterpret_cast<Dtype*>(bwdf_filter_diff_iter->prv_ptr()),
+              this->blobs_[0]->mutable_prv_diff());
+      } else {
+        caffe_axpy<Dtype>((const int)this->blobs_[0]->count(), 1,
+              reinterpret_cast<Dtype*>(bwdf_filter_diff_iter->prv_ptr()),
+              this->blobs_[0]->mutable_cpu_diff());
+      }
+    }
+  }
+
+  if (this->param_propagate_down(1)) {
+    void *res_convolutionBwdBias[dnnResourceNumber];
+
+    res_convolutionBwdBias[dnnResourceDiffDst] =
+            bwdb_top_diff->get_converted_prv(top[0], true);
+    if (Caffe::iter_size() > 1) {
+      // if (iter_size > 1) then diffs are accumulated across iterations
+      res_convolutionBwdBias[dnnResourceDiffBias] =
+            bwdb_bias_diff_iter->prv_ptr();
+    } else {
+      if (bwdb_bias_diff->conversion_needed()) {
+        this->blobs_[1]->set_prv_diff_descriptor(bwdb_bias_diff);
+          res_convolutionBwdBias[dnnResourceDiffBias] =
+              reinterpret_cast<void *>(this->blobs_[1]->mutable_prv_diff());
+
+      } else {
+        res_convolutionBwdBias[dnnResourceDiffBias] =
+            reinterpret_cast<void *>(this->blobs_[1]->mutable_cpu_diff());
+      }
+    }
+
+    PERFORMANCE_EVENT_ID_INIT(perf_id_bw_bias_,
+        PERFORMANCE_MKL_NAME_DETAILED("BW", "_bias"));
+    PERFORMANCE_MEASUREMENT_BEGIN();
+    status = dnnExecute<Dtype>(convolutionBwdBias, res_convolutionBwdBias);
+    PERFORMANCE_MEASUREMENT_END_ID(perf_id_bw_bias_);
+
+    CHECK_EQ(status, 0) << "Backward Bias failed with status " << status;
+
+    if (Caffe::iter_size() > 1) {
+      // if (iter_size > 1) then diffs are accumulated across iterations
+      if (bwdb_bias_diff->conversion_needed()) {
+        caffe_axpy<Dtype>((const int)this->blobs_[1]->prv_diff_count(), 1,
+              reinterpret_cast<Dtype*>(bwdb_bias_diff_iter->prv_ptr()),
+              this->blobs_[1]->mutable_prv_diff());
+      } else {
+        caffe_axpy<Dtype>((const int)this->blobs_[1]->count(), 1,
+              reinterpret_cast<Dtype*>(bwdb_bias_diff_iter->prv_ptr()),
+              this->blobs_[1]->mutable_cpu_diff());
+      }
+    }
+  }
+}
+
+#ifdef CPU_ONLY
+STUB_GPU(MKLDeconvolutionLayer);
+#else
+template <typename Dtype>
+void MKLDeconvolutionLayer<Dtype>::Forward_gpu(
+    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top)
+  {NOT_IMPLEMENTED;}
+template <typename Dtype>
+void MKLDeconvolutionLayer<Dtype>::Backward_gpu(
+    const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
+    const vector<Blob<Dtype>*>& bottom)
+  {NOT_IMPLEMENTED;}
+#endif
+
+INSTANTIATE_CLASS(MKLDeconvolutionLayer);
+}  // namespace caffe
+#endif  // #ifdef MKL2017_SUPPORTED
diff --git a/src/caffe/test/test_mkl_deconvolution_layer.cpp b/src/caffe/test/test_mkl_deconvolution_layer.cpp
new file mode 100644
index 000000000..9ea1214d7
--- /dev/null
+++ b/src/caffe/test/test_mkl_deconvolution_layer.cpp
@@ -0,0 +1,419 @@
+/*
+All modification made by Intel Corporation: © 2016 Intel Corporation
+
+All contributions by the University of California:
+Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
+All rights reserved.
+
+All other contributions:
+Copyright (c) 2014, 2015, the respective contributors
+All rights reserved.
+For the list of contributors go to https://github.com/BVLC/caffe/blob/master/CONTRIBUTORS.md
+
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef MKL2017_SUPPORTED
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/layers/mkl_layers.hpp"
+#include "caffe/layers/deconv_layer.hpp"
+
+#include "caffe/test/test_caffe_main.hpp"
+#include "caffe/test/test_gradient_check_util.hpp"
+
+namespace caffe {
+
+// Since ConvolutionLayerTest checks the shared conv/deconv code in detail,
+// we'll just do a simple forward test and a gradient check.
+template <typename TypeParam>
+class MKLDeconvolutionLayerTest : public MultiDeviceTest<TypeParam> {
+  typedef typename TypeParam::Dtype Dtype;
+
+ protected:
+  MKLDeconvolutionLayerTest()
+      : blob_bottom_(new Blob<Dtype>(2, 3, 6, 4)),
+        ref_blob_bottom_(new Blob<Dtype>(2, 3, 6, 4)),
+        blob_bottom_2_(new Blob<Dtype>(2, 3, 6, 4)),
+        blob_top_(new Blob<Dtype>()),
+        ref_blob_top_(new Blob<Dtype>()),
+        blob_top_2_(new Blob<Dtype>()) {}
+  virtual void SetUp() {
+    // fill the values
+    FillerParameter filler_param;
+    filler_param.set_value(1.);
+    GaussianFiller<Dtype> filler(filler_param);
+    filler.Fill(this->blob_bottom_);
+    filler.Fill(this->ref_blob_bottom_);
+    filler.Fill(this->blob_bottom_2_);
+    blob_bottom_vec_.push_back(blob_bottom_);
+    blob_top_vec_.push_back(blob_top_);
+    ref_blob_bottom_vec_.push_back(ref_blob_bottom_);
+    ref_blob_top_vec_.push_back(ref_blob_top_);
+
+  }
+
+  virtual ~MKLDeconvolutionLayerTest() {
+    delete blob_bottom_;
+    delete ref_blob_bottom_;
+    delete blob_bottom_2_;
+    delete blob_top_;
+    delete ref_blob_top_;
+    delete blob_top_2_;
+  }
+
+  Blob<Dtype>* const blob_bottom_;
+  Blob<Dtype>* const ref_blob_bottom_;
+  Blob<Dtype>* const blob_bottom_2_;
+  Blob<Dtype>* const blob_top_;
+  Blob<Dtype>* const ref_blob_top_;
+  Blob<Dtype>* const blob_top_2_;
+  vector<Blob<Dtype>*> blob_bottom_vec_;
+  vector<Blob<Dtype>*> blob_top_vec_;
+  vector<Blob<Dtype>*> ref_blob_bottom_vec_;
+  vector<Blob<Dtype>*> ref_blob_top_vec_;
+
+};
+
+TYPED_TEST_CASE(MKLDeconvolutionLayerTest, TestDtypesAndDevices);
+
+TYPED_TEST(MKLDeconvolutionLayerTest, TestSetup) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  ConvolutionParameter* convolution_param =
+      layer_param.mutable_convolution_param();
+  convolution_param->add_kernel_size(3);
+  convolution_param->add_stride(2);
+  convolution_param->set_num_output(4);
+  this->blob_bottom_vec_.push_back(this->blob_bottom_2_);
+  this->blob_top_vec_.push_back(this->blob_top_2_);
+  shared_ptr<Layer<Dtype> > layer(
+      new MKLDeconvolutionLayer<Dtype>(layer_param));
+  layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  EXPECT_EQ(this->blob_top_->num(), 2);
+  EXPECT_EQ(this->blob_top_->channels(), 4);
+  EXPECT_EQ(this->blob_top_->height(), 13);
+  EXPECT_EQ(this->blob_top_->width(), 9);
+  EXPECT_EQ(this->blob_top_2_->num(), 2);
+  EXPECT_EQ(this->blob_top_2_->channels(), 4);
+  EXPECT_EQ(this->blob_top_2_->height(), 13);
+  EXPECT_EQ(this->blob_top_2_->width(), 9);
+  // setting group should not change the shape
+  convolution_param->set_num_output(3);
+  convolution_param->set_group(3);
+  layer.reset(new MKLDeconvolutionLayer<Dtype>(layer_param));
+  layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  EXPECT_EQ(this->blob_top_->num(), 2);
+  EXPECT_EQ(this->blob_top_->channels(), 3);
+  EXPECT_EQ(this->blob_top_->height(), 13);
+  EXPECT_EQ(this->blob_top_->width(), 9);
+  EXPECT_EQ(this->blob_top_2_->num(), 2);
+  EXPECT_EQ(this->blob_top_2_->channels(), 3);
+  EXPECT_EQ(this->blob_top_2_->height(), 13);
+  EXPECT_EQ(this->blob_top_2_->width(), 9);
+}
+
+TYPED_TEST(MKLDeconvolutionLayerTest, TestSimpleMKLDeconvolution) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  ConvolutionParameter* convolution_param =
+      layer_param.mutable_convolution_param();
+  convolution_param->add_kernel_size(3);
+  convolution_param->add_stride(2);
+  convolution_param->set_num_output(4);
+  convolution_param->mutable_weight_filler()->set_type("constant");
+  convolution_param->mutable_weight_filler()->set_value(1);
+  convolution_param->mutable_bias_filler()->set_type("constant");
+  convolution_param->mutable_bias_filler()->set_value(0.1);
+  shared_ptr<Layer<Dtype> > layer(
+      new MKLDeconvolutionLayer<Dtype>(layer_param));
+  shared_ptr<Layer<Dtype> > ref_layer(
+      new DeconvolutionLayer<Dtype>(layer_param));
+
+  layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  ref_layer->SetUp(this->ref_blob_bottom_vec_, this->ref_blob_top_vec_);
+  // constant-fill the bottom blobs
+  FillerParameter filler_param;
+  filler_param.set_value(1.);
+
+  ConstantFiller<Dtype> filler(filler_param);
+  filler.Fill(this->blob_bottom_);
+  filler.Fill(this->ref_blob_bottom_);
+
+  layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  ref_layer->Forward(this->ref_blob_bottom_vec_, this->ref_blob_top_vec_);
+  // simply check that accumulation works with overlapping filters
+  const Dtype* top_data = this->blob_top_->cpu_data();
+  const Dtype* ref_top_data = this->ref_blob_top_->cpu_data();
+  for (int n = 0; n < this->blob_top_->num(); ++n) {
+    for (int c = 0; c < this->blob_top_->channels(); ++c) {
+      for (int h = 0; h < this->blob_top_->height(); ++h) {
+        for (int w = 0; w < this->blob_top_->width(); ++w) {
+          Dtype expected = 3.1;
+          bool h_overlap = h % 2 == 0 && h > 0
+            && h < this->blob_top_->height() - 1;
+          bool w_overlap = w % 2 == 0 && w > 0
+            && w < this->blob_top_->width() - 1;
+          if (h_overlap && w_overlap) {
+            expected += 9;
+          } else if (h_overlap || w_overlap) {
+            expected += 3;
+          }
+
+          EXPECT_NEAR(top_data[this->blob_top_->offset(n, c, h, w)],
+              expected, 1e-4);
+          EXPECT_NEAR(ref_top_data[this->blob_top_->offset(n, c, h, w)],
+              expected, 1e-4);
+        }
+      }
+    }
+  }
+
+  // set top_diff
+  Dtype* top_diff = this->blob_top_->mutable_cpu_diff();
+  Dtype* ref_top_diff = this->ref_blob_top_->mutable_cpu_diff();
+  for( int n = 0; n < this->blob_top_->num(); ++n) {
+      for( int c = 0; c < this->blob_top_->channels(); ++c) {
+          for( int h=0; h < this->blob_top_->height(); ++h) {
+              for(int w = 0; w < this->blob_top_->width(); ++w) {
+                top_diff[this->blob_top_->offset(n, c, h, w)] = ref_top_data[this->blob_top_->offset(n, c, h, w)];
+                ref_top_diff[this->blob_top_->offset(n, c, h, w)] = ref_top_data[this->blob_top_->offset(n, c, h, w)];
+              }
+          }
+      }
+  }
+
+
+  vector<bool> need_backward({true});
+  layer->Backward(this->blob_top_vec_, need_backward, this->blob_bottom_vec_);
+  ref_layer->Backward(this->ref_blob_top_vec_, need_backward, this->ref_blob_bottom_vec_);
+
+  // check backward data
+  const Dtype* bottom_diff = this->blob_bottom_->cpu_diff();
+  const Dtype* ref_bottom_diff = this->ref_blob_bottom_->cpu_diff();
+  for( int n = 0; n < this->blob_bottom_->num(); ++n) {
+      for( int c = 0; c < this->blob_bottom_->channels(); ++c) {
+          for( int h=0; h < this->blob_bottom_->height(); ++h) {
+              for(int w = 0; w < this->blob_bottom_->width(); ++w) {
+                EXPECT_NEAR(bottom_diff[this->blob_bottom_->offset(n, c, h, w)],
+                        ref_bottom_diff[this->blob_bottom_->offset(n, c, h, w)],
+                        1e-4);
+              }
+          }
+      }
+  }
+  // check backward weights
+  for (int i = 0; i < layer->blobs().size(); ++i) {
+      Blob<Dtype>* blob = layer->blobs()[i].get();
+      Blob<Dtype>* ref_blob = ref_layer->blobs()[i].get();
+      const Dtype* weights_diff = blob->cpu_diff();
+      const Dtype* ref_weights_diff = ref_blob->cpu_diff();
+      for( int n = 0; n < blob->num(); ++n) {
+          for( int c = 0; c <blob->channels(); ++c) {
+              for( int h = 0; h < blob->height(); ++h) {
+                  for( int w =0; w < blob->width(); ++w) {
+                    //printf("%.4f ", weights_diff[blob->offset(n, c, h, w)] - ref_weights_diff[ref_blob->offset(n, c, h, w)]);
+                    EXPECT_NEAR(weights_diff[blob->offset(n, c, h, w)],
+                            ref_weights_diff[ref_blob->offset(n, c, h, w)],
+                            1e-3);
+                  }
+              }
+          }
+      }
+  }
+}
+TYPED_TEST(MKLDeconvolutionLayerTest, TestGradient) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  ConvolutionParameter* convolution_param =
+      layer_param.mutable_convolution_param();
+  this->blob_bottom_vec_.push_back(this->blob_bottom_2_);
+  this->blob_top_vec_.push_back(this->blob_top_2_);
+  convolution_param->add_kernel_size(2);
+  convolution_param->add_stride(1);
+  convolution_param->set_num_output(1);
+  convolution_param->mutable_weight_filler()->set_type("gaussian");
+  convolution_param->mutable_bias_filler()->set_type("gaussian");
+  MKLDeconvolutionLayer<Dtype> layer(layer_param);
+  GradientChecker<Dtype> checker(1e-2, 1e-3);
+  checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_);
+}
+TYPED_TEST(MKLDeconvolutionLayerTest, TestNDAgainst2D) {
+  typedef typename TypeParam::Dtype Dtype;
+  const int kernel_h = 11;
+  const int kernel_w = 13;
+  vector<int> bottom_shape(4);
+  bottom_shape[0] = 15;
+  bottom_shape[1] = 12;
+  bottom_shape[2] = kernel_h * 2;
+  bottom_shape[3] = kernel_w * 2;
+  FillerParameter filler_param;
+  GaussianFiller<Dtype> filler(filler_param);
+  for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) {
+    this->blob_bottom_vec_[i]->Reshape(bottom_shape);
+    filler.Fill(this->blob_bottom_vec_[i]);
+  }
+  LayerParameter layer_param;
+  ConvolutionParameter* convolution_param =
+      layer_param.mutable_convolution_param();
+  convolution_param->set_num_output(18);
+  convolution_param->set_bias_term(false);
+  convolution_param->set_group(6);
+  convolution_param->set_kernel_h(kernel_h);
+  convolution_param->set_kernel_w(kernel_w);
+  convolution_param->mutable_weight_filler()->set_type("gaussian");
+  Blob<Dtype> weights;
+  Blob<Dtype> top_diff;
+  // Shape and fill weights and top_diff.
+  bool copy_diff;
+  bool reshape;
+  {
+    MKLDeconvolutionLayer<Dtype> layer(layer_param);
+    layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+    top_diff.ReshapeLike(*this->blob_top_);
+    filler.Fill(&top_diff);
+    ASSERT_EQ(1, layer.blobs().size());
+    copy_diff = false; reshape = true;
+    weights.CopyFrom(*layer.blobs()[0], copy_diff, reshape);
+  }
+  vector<bool> propagate_down(1, true);
+  Blob<Dtype> result_2d;
+  Blob<Dtype> backward_result_2d;
+  Blob<Dtype> backward_weight_result_2d;
+  // Test with 2D im2col
+  {
+    caffe_set(this->blob_top_->count(), Dtype(0),
+              this->blob_top_->mutable_cpu_data());
+    caffe_set(this->blob_bottom_->count(), Dtype(0),
+              this->blob_bottom_->mutable_cpu_diff());
+    caffe_set(weights.count(), Dtype(0), weights.mutable_cpu_diff());
+    // Do SetUp and Forward; save Forward result in result_2d.
+    convolution_param->set_force_nd_im2col(false);
+    MKLDeconvolutionLayer<Dtype> layer_2d(layer_param);
+    layer_2d.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+    ASSERT_EQ(1, layer_2d.blobs().size());
+    copy_diff = false; reshape = false;
+    layer_2d.blobs()[0]->CopyFrom(weights, copy_diff, reshape);
+    layer_2d.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+    copy_diff = false; reshape = true;
+    result_2d.CopyFrom(*this->blob_top_, copy_diff, reshape);
+    // Copy pre-generated top diff into actual top diff;
+    // do Backward and save result in backward_result_2d.
+    ASSERT_EQ(this->blob_top_->shape(), top_diff.shape());
+    caffe_copy(top_diff.count(), top_diff.cpu_data(),
+               this->blob_top_->mutable_cpu_diff());
+    layer_2d.Backward(this->blob_top_vec_, propagate_down,
+                      this->blob_bottom_vec_);
+    copy_diff = true; reshape = true;
+    backward_result_2d.CopyFrom(*this->blob_bottom_, copy_diff, reshape);
+    backward_weight_result_2d.CopyFrom(weights, copy_diff, reshape);
+  }
+  Blob<Dtype> result_nd;
+  Blob<Dtype> backward_result_nd;
+  Blob<Dtype> backward_weight_result_nd;
+  // Test with ND im2col
+  {
+    caffe_set(this->blob_top_->count(), Dtype(0),
+              this->blob_top_->mutable_cpu_data());
+    caffe_set(this->blob_bottom_->count(), Dtype(0),
+              this->blob_bottom_->mutable_cpu_diff());
+    caffe_set(weights.count(), Dtype(0), weights.mutable_cpu_diff());
+    // Do SetUp and Forward; save Forward result in result_nd.
+    convolution_param->set_force_nd_im2col(true);
+    MKLDeconvolutionLayer<Dtype> layer_nd(layer_param);
+    layer_nd.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+    ASSERT_EQ(1, layer_nd.blobs().size());
+    copy_diff = false; reshape = false;
+    layer_nd.blobs()[0]->CopyFrom(weights, copy_diff, reshape);
+    layer_nd.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+    copy_diff = false; reshape = true;
+    result_nd.CopyFrom(*this->blob_top_, copy_diff, reshape);
+    // Copy pre-generated top diff into actual top diff;
+    // do Backward and save result in backward_result_nd.
+    ASSERT_EQ(this->blob_top_->shape(), top_diff.shape());
+    caffe_copy(top_diff.count(), top_diff.cpu_data(),
+               this->blob_top_->mutable_cpu_diff());
+    layer_nd.Backward(this->blob_top_vec_, propagate_down,
+                      this->blob_bottom_vec_);
+    copy_diff = true; reshape = true;
+    backward_result_nd.CopyFrom(*this->blob_bottom_, copy_diff, reshape);
+    backward_weight_result_nd.CopyFrom(weights, copy_diff, reshape);
+  }
+  ASSERT_EQ(result_nd.count(), result_2d.count());
+  for (int i = 0; i < result_2d.count(); ++i)  {
+    EXPECT_EQ(result_2d.cpu_data()[i], result_nd.cpu_data()[i]);
+  }
+  ASSERT_EQ(backward_result_nd.count(), backward_result_2d.count());
+  for (int i = 0; i < backward_result_2d.count(); ++i) {
+    EXPECT_EQ(backward_result_2d.cpu_diff()[i],
+              backward_result_nd.cpu_diff()[i]);
+  }
+  ASSERT_EQ(backward_weight_result_nd.count(),
+            backward_weight_result_2d.count());
+  for (int i = 0; i < backward_weight_result_2d.count(); ++i) {
+    EXPECT_EQ(backward_weight_result_2d.cpu_diff()[i],
+              backward_weight_result_nd.cpu_diff()[i]);
+  }
+}
+
+#if 0
+TYPED_TEST(MKLDeconvolutionLayerTest, TestGradient3D) {
+  typedef typename TypeParam::Dtype Dtype;
+  vector<int> bottom_shape(5);
+  bottom_shape[0] = this->blob_bottom_vec_[0]->shape(0);
+  bottom_shape[1] = this->blob_bottom_vec_[0]->shape(1);
+  bottom_shape[2] = 2;
+  bottom_shape[3] = 3;
+  bottom_shape[4] = 2;
+  FillerParameter filler_param;
+  GaussianFiller<Dtype> filler(filler_param);
+  for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) {
+    this->blob_bottom_vec_[i]->Reshape(bottom_shape);
+    filler.Fill(this->blob_bottom_vec_[i]);
+  }
+  LayerParameter layer_param;
+  ConvolutionParameter* convolution_param =
+      layer_param.mutable_convolution_param();
+  convolution_param->add_kernel_size(2);
+  convolution_param->add_stride(2);
+  convolution_param->add_pad(1);
+  convolution_param->set_num_output(2);
+  convolution_param->mutable_weight_filler()->set_type("gaussian");
+  convolution_param->mutable_bias_filler()->set_type("gaussian");
+  MKLDeconvolutionLayer<Dtype> layer(layer_param);
+  GradientChecker<Dtype> checker(1e-2, 1e-3);
+  checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_);
+}
+#endif
+}  // namespace caffe
+
+#endif

From fde50b138bce197fca5e586ddb416ff4b953ea6a Mon Sep 17 00:00:00 2001
From: "Jin, Ge" <ge.jin@intel.com>
Date: Fri, 23 Jun 2017 20:49:13 -0400
Subject: [PATCH 29/34] Add RemoveBNScale feature

Signed-off-by: Jin, Ge <ge.jin@intel.com>
---
 include/caffe/net.hpp                    |   9 +-
 include/caffe/util/remove_batch_norm.hpp |  73 ++++++++
 src/caffe/net.cpp                        | 149 ++++++++++++++++-
 src/caffe/proto/caffe.proto              |  14 ++
 src/caffe/test/test_net.cpp              |  15 +-
 src/caffe/util/remove_batch_norm.cpp     | 203 +++++++++++++++++++++++
 6 files changed, 456 insertions(+), 7 deletions(-)
 create mode 100644 include/caffe/util/remove_batch_norm.hpp
 create mode 100644 src/caffe/util/remove_batch_norm.cpp

diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp
index 14519d38a..0dc63436c 100644
--- a/include/caffe/net.hpp
+++ b/include/caffe/net.hpp
@@ -300,6 +300,11 @@ class Net {
                              NetParameter* param_compiled);
 
 
+  /**
+   * @brief If find "Conv--BN--Scale" in current network, merge BN and Scale layer into Convolution
+   * layers, this optimization only works in caffe TEST phase now.
+   */
+  static void RemoveBNScale(const NetParameter& param, NetParameter* param_compiled);
 
   static void GetBlobConsumers(std::vector<const LayerParameter*> &cnsmer_blobs,
                                                 const string& blob_name_to_find,
@@ -330,7 +335,9 @@ class Net {
   void BackwardDebugInfo(const int layer_id);
   /// @brief Helper for displaying debug info in Update.
   void UpdateDebugInfo(const int param_id);
-
+  bool bn_scale_remove_;
+  bool bn_scale_merge_;
+  vector<string> kept_bn_layers_;
   /// @brief The network name
   string name_;
   /// @brief The engine name
diff --git a/include/caffe/util/remove_batch_norm.hpp b/include/caffe/util/remove_batch_norm.hpp
new file mode 100644
index 000000000..c2e92f40f
--- /dev/null
+++ b/include/caffe/util/remove_batch_norm.hpp
@@ -0,0 +1,73 @@
+
+/*
+All modification made by Intel Corporation: © 2016 Intel Corporation
+
+All contributions by the University of California:
+Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
+All rights reserved.
+
+All other contributions:
+Copyright (c) 2014, 2015, the respective contributors
+All rights reserved.
+For the list of contributors go to https://github.com/BVLC/caffe/blob/master/CONTRIBUTORS.md
+
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef COMPILE_NET_UTIL_HPP_
+#define COMPILE_NET_UTIL_HPP_
+#include "caffe/proto/caffe.pb.h"
+
+namespace caffe {
+/**
+ *  @brief If CompileNet's compilation rule one does work, some scale layer's weights and bias blobs
+ *  may be merged into batch norm layer. RecoverScaleFromBN will recover the merged scale layer's info.
+ *  Currently, we only care about the weights and bias info.
+ */
+template <typename Dtype>
+void RecoverScaleFromBN(const LayerParameter& bn_layer_param, LayerParameter& scale_layer_param, Dtype default_scale_weights, Dtype default_scale_bias);
+/**
+ *  @brief rename layer1's top to layer2's
+ */
+void MergeLayer(LayerParameter &layer1, const LayerParameter &layer2);
+
+/**
+ *  @brief After removing the batch norm and scale layer after a convolution layer, to make the inference
+ *  result correct, we must adjust convolution layer's weights and bias blobs
+ */
+
+template <typename Dtype>
+void AdjustConvLayer(LayerParameter &conv_layer,
+                     const LayerParameter &batch_norm_layer,
+                     const LayerParameter &scale_layer, bool is_net_init);
+
+/**
+ *  @brief The batch norm and scale layer may be merged due to compilation rule one's effect, RecoverBNScaleMergedNet
+ *  is used to recover the scale layer
+ */
+template <typename Dtype>
+void RecoverBNScaleMergedNet(NetParameter * net_param, NetParameter* recovered_net_param);
+
+}
+#endif
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index dfcc941da..10d39fc54 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -61,6 +61,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "caffe/test/test_caffe_main.hpp"
 #include "caffe/multinode/mlsl.hpp"
 #include "caffe/multinode/apply_mn_param.hpp"
+#include "caffe/util/remove_batch_norm.hpp"
 
 PERFORMANCE_CREATE_MONITOR();
 
@@ -138,6 +139,15 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
   NetParameter compiled_param;
   // Transform Net (merge layers etc.) improve computational performance
   CompileNet(param, &compiled_param);
+
+
+
+  this->bn_scale_remove_ = param.compile_net_state().bn_scale_remove();
+  this->bn_scale_merge_ = param.compile_net_state().bn_scale_merge();
+  int kept_bn_layers_num = param.compile_net_state().kept_bn_layers_size();
+  for (int idx = 0; idx < kept_bn_layers_num; ++idx) {
+    this->kept_bn_layers_.push_back(param.compile_net_state().kept_bn_layers(idx));
+  }
   param = compiled_param;
 
 #ifdef USE_MLSL
@@ -480,10 +490,18 @@ void Net<Dtype>::FilterNet(const NetParameter& param,
 template <typename Dtype>
 void Net<Dtype>::CompileNet(const NetParameter& param,
     NetParameter* param_compiled) {
+
+
+
+  NetParameter param_temp0;
+  param_temp0.CopyFrom(param);
+  param_temp0.clear_layer();
+  RemoveBNScale(param, &param_temp0);
+
   NetParameter param_temp;  // temporary compiled param
-  param_temp.CopyFrom(param);
+  param_temp.CopyFrom(param_temp0);
   param_temp.clear_layer();    // Remove layers
-  CompilationRuleOne(param, &param_temp);
+  CompilationRuleOne(param_temp0, &param_temp);
 
   NetParameter param_temp2;  // temporary compiled param
   param_temp2.CopyFrom(param_temp);
@@ -499,6 +517,8 @@ void Net<Dtype>::CompileNet(const NetParameter& param,
 template <typename Dtype>
 void Net<Dtype>::CompilationRuleOne(const NetParameter& param,
                                     NetParameter* param_compiled) {
+
+  bool merge_bn_scale = false;
   std::set<std::string> layers_to_drop;
   for (int i = 0; i < param.layer_size(); ++i) {
     LayerParameter* layer_param =
@@ -548,6 +568,7 @@ void Net<Dtype>::CompilationRuleOne(const NetParameter& param,
         const string& scale_top_blob_name = consumer_layer_param.top(0);
         // Mark Consumer layer (its name) as the one marked for dropping
         layers_to_drop.insert(consumer_layer_param.name());
+        if (!merge_bn_scale) merge_bn_scale = true;
 
         // Replace BatchNorm top name with Scale top name
         batchnorm_top_blob_name.resize(scale_top_blob_name.size());
@@ -579,6 +600,7 @@ void Net<Dtype>::CompilationRuleOne(const NetParameter& param,
       param_compiled->add_layer()->CopyFrom(*layer_param);
     }
   }
+  param_compiled->mutable_compile_net_state()->set_bn_scale_merge(merge_bn_scale);
 }
 
 
@@ -744,6 +766,102 @@ void Net<Dtype>::CompilationRuleThree(const NetParameter& param,
   return;
 }
 
+
+template <typename Dtype>
+void Net<Dtype>::RemoveBNScale(const NetParameter& param,
+                             NetParameter* param_compiled) {
+    // - In TEST Phase, if we detect sequential layers conv->batch norm ->scale,
+    // We will merge batch norm and scale layer into conv layer.
+  if(param.state().phase() != TEST) {
+    param_compiled->CopyFrom(param);
+    param_compiled->mutable_compile_net_state()->set_bn_scale_remove(false);
+    return ;
+  }
+
+  bool bn_scale_remove = false;
+  bool is_net_init = param.compile_net_state().is_init();
+  std::set<std::string> layers_to_drop;
+  for (int i = 0; i < param.layer_size(); ++i) {
+    LayerParameter *layer_param = (const_cast<NetParameter&>(param)).mutable_layer(i);
+    bool layer_included = true;
+    bool bn_use_global_stats_set = true;
+    if (layer_param->type().compare("Convolution") == 0) {
+      std::vector<const LayerParameter*> child_layers_params;
+      GetBlobConsumers(child_layers_params, layer_param->top(0), param, i + 1 < param.layer_size() ? i + 1 : i);
+      const LayerParameter &child_layer_param = child_layers_params.size() > 0 ? *(child_layers_params[0]) : *layer_param;
+      // check whether child layer is BatchNorm
+      if (child_layer_param.type().compare("BatchNorm") == 0) {
+        BatchNormParameter bn_param = child_layer_param.batch_norm_param();
+        if (is_net_init) {
+          //Testing Network init process
+          bool bn_use_global_stats = true;
+          if (bn_param.has_use_global_stats()) {
+            bn_use_global_stats = bn_param.use_global_stats();
+          }
+          if (!bn_use_global_stats) {
+            //This bn layer's use_global_stats is set manually! Don't remove it.
+            //remained_bn_layer_names.push_back(child_layer_param.name());
+            param_compiled->mutable_compile_net_state()->add_kept_bn_layers(child_layer_param.name());
+            bn_use_global_stats_set = false;
+          }
+        } else {
+          int kept_bn_layers_num = param.compile_net_state().kept_bn_layers_size();
+          bool in_kept_list = false;
+          for (int idx = 0; idx < kept_bn_layers_num; ++idx) {
+            if (child_layer_param.name().compare(param.compile_net_state().kept_bn_layers(idx)) == 0) {
+              in_kept_list = true;
+              break;
+            }
+          }
+          if (in_kept_list) {
+            bn_use_global_stats_set = false;
+          }
+        }
+
+        if (!bn_use_global_stats_set) {
+          //Even in caffe TEST phase, current batch norm layer has set use_global_stats = false in protxt file, so we won't
+          //merge this layer into convolution layer.
+         param_compiled->add_layer()->CopyFrom(*layer_param);
+          continue;
+        }
+        std::vector<const LayerParameter*> grandchild_layers_params;
+        GetBlobConsumers(grandchild_layers_params, child_layer_param.top(0), param, i + 2 < param.layer_size() ? i + 2 : i);
+        const LayerParameter &grandchild_layer_param = (grandchild_layers_params.size() > 0) ? *(grandchild_layers_params[0]) : child_layer_param;
+        if (grandchild_layer_param.type().compare("Scale") == 0) {
+          MergeLayer(*layer_param, grandchild_layer_param);
+          AdjustConvLayer<Dtype>(*layer_param, child_layer_param, grandchild_layer_param, is_net_init);
+          if (bn_scale_remove == false) bn_scale_remove = true;
+          layers_to_drop.insert(child_layer_param.name());
+          layers_to_drop.insert(grandchild_layer_param.name());
+        } else if (&child_layer_param != &grandchild_layer_param) {
+          //In fact, conv-->batchnorm can also be optimized. In such case, we check the blob size of batch norm layer
+          //if is 3, it means current net hasn't used scale layer, this is equivalent to scale layer with all 1 weights and 0 bias
+          //if is 4 or 5, it means intel caffe compilation rule 1 works here, we can recover the scale layer from batch norm layer
+          MergeLayer(*layer_param, child_layer_param);
+          if (!is_net_init) {
+            shared_ptr<LayerParameter> scale_layer_param(new LayerParameter());
+            RecoverScaleFromBN(child_layer_param, *scale_layer_param, (Dtype)1, (Dtype)0);
+            AdjustConvLayer<Dtype>(*layer_param, child_layer_param, *scale_layer_param, is_net_init);
+          }
+          if (bn_scale_remove == false) bn_scale_remove = true;
+          layers_to_drop.insert(child_layer_param.name());
+        }
+      }
+    }
+    if (layers_to_drop.find(layer_param->name()) != layers_to_drop.end()) {
+      LOG_IF(INFO, Caffe::root_solver()) << "Dropped Layer: "<< layer_param->name() << std::endl;
+      layer_included = false;
+      // Remove dropped layer from the list of layers to be dropped
+      layers_to_drop.erase(layers_to_drop.find(layer_param->name()));
+    }
+    if (layer_included) {
+            param_compiled->add_layer()->CopyFrom(*layer_param);
+    }
+  }
+
+  param_compiled->mutable_compile_net_state()->set_bn_scale_remove(bn_scale_remove);
+ }
+
 template <typename Dtype>
 void Net<Dtype>::GetBlobConsumers(
                   std::vector<const LayerParameter*>& consumer_blobs,
@@ -1145,6 +1263,28 @@ void Net<Dtype>::UpdateDebugInfo(const int param_id) {
 
 template <typename Dtype>
 void Net<Dtype>::ShareTrainedLayersWith(const Net* other) {
+
+
+    if (this->bn_scale_remove_) {
+    //This path shows testing network's blobs(weight & bias) has been adjusted
+    //We can't share weights & blobs with training net! We will save current
+    //training net to a temp model file and load to memory later
+    NetParameter temp_net_param;
+    NetParameter complete_net_param;
+    other->ToProto(&temp_net_param, false);
+    //Copy this->remained_bn_layer_names to temp_net_param
+    for (vector<string>::iterator it = kept_bn_layers_.begin(); it != kept_bn_layers_.end(); it++) {
+      temp_net_param.mutable_compile_net_state()->add_kept_bn_layers(*it);
+    }
+    //temp_net_param.mutable_compile_net_state()->set_bn_top_rename(other->bn_top_rename_);
+    complete_net_param.CopyFrom(temp_net_param);
+    complete_net_param.clear_layer();
+    if (other->bn_scale_merge_) {
+      RecoverBNScaleMergedNet<Dtype>(&temp_net_param, &complete_net_param);
+    }
+    CopyTrainedLayersFrom(complete_net_param);
+    return ;
+  }
   int num_source_layers = other->layers().size();
   for (int i = 0; i < num_source_layers; ++i) {
     Layer<Dtype>* source_layer = other->layers()[i].get();
@@ -1216,6 +1356,11 @@ void Net<Dtype>::CopyTrainedLayersFrom(const NetParameter& param_inp) {
   NetParameter param_tmp = param_inp;
   NetParameter &param = param_tmp;
   param.set_engine(engine_name_);
+  param_tmp.mutable_state()->set_phase(phase_);
+  param_tmp.mutable_compile_net_state()->set_is_init(false);
+  for (vector<string>::iterator it = this->kept_bn_layers_.begin(); it != this->kept_bn_layers_.end(); it++) {
+    param_tmp.mutable_compile_net_state()->add_kept_bn_layers(*it);
+  }
   NetParameter param_compiled;
   CompileNet(param, &param_compiled);
   param = param_compiled;
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index ab900378f..c4c5228e5 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -197,6 +197,11 @@ message NetParameter {
   // specified in the layers' include and exclude fields.
   optional NetState state = 6;
 
+  //The CompileNet will do some layer-fusion optimization to current network if it
+  //finds something can be optimized, compile_net_state records which Compilation Rule
+  //really works.
+  optional CompileNetState compile_net_state = 10;
+
   // Print debugging information about results while running Net::Forward,
   // Net::Backward, and Net::Update.
   optional bool debug_info = 7 [default = false];
@@ -214,6 +219,15 @@ message NetParameter {
   optional MultinodeParameter multinode = 101;
 }
 
+
+
+message CompileNetState {
+  optional bool is_init = 1 [default = true];
+  optional bool bn_scale_remove = 2 [default = false];
+  optional bool bn_scale_merge = 3 [default = false];
+  repeated string kept_bn_layers = 4;
+}
+
 message MultinodeParameter {
   repeated MnModelParallelParameter model_parallel = 1;
 }
diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp
index 7c6b12ddd..ae4d3f03f 100644
--- a/src/caffe/test/test_net.cpp
+++ b/src/caffe/test/test_net.cpp
@@ -2958,6 +2958,7 @@ TYPED_TEST(NetTest, TestAllInOneNetDeploy) {
   ASSERT_TRUE(found_data);
 }
 
+
 class CompileNetTest : public ::testing::Test {
  protected:
   void RunCompilerNetTest(
@@ -2970,13 +2971,19 @@ class CompileNetTest : public ::testing::Test {
         compiled_param_string, &expected_compiled_param));
     NetParameter actual_compiled_param;
     Net<float>::CompileNet(input_param, &actual_compiled_param);
-    EXPECT_EQ(expected_compiled_param.DebugString(),
-        actual_compiled_param.DebugString());
+    actual_compiled_param.mutable_compile_net_state()->Clear();
+    expected_compiled_param.mutable_compile_net_state()->Clear();
+    string expect_net_string = expected_compiled_param.DebugString();
+    string actual_net_string = actual_compiled_param.DebugString();
+    EXPECT_EQ(expect_net_string,
+        actual_net_string);
     // Also test idempotence.
     NetParameter double_compiled_param;
     Net<float>::CompileNet(actual_compiled_param, &double_compiled_param);
-    EXPECT_EQ(actual_compiled_param.DebugString(),
-       double_compiled_param.DebugString());
+    double_compiled_param.mutable_compile_net_state()->Clear();
+    string double_net_string = double_compiled_param.DebugString();
+    EXPECT_EQ(actual_net_string,
+       double_net_string);
   }
 };
 
diff --git a/src/caffe/util/remove_batch_norm.cpp b/src/caffe/util/remove_batch_norm.cpp
new file mode 100644
index 000000000..63c9b3f81
--- /dev/null
+++ b/src/caffe/util/remove_batch_norm.cpp
@@ -0,0 +1,203 @@
+/*
+All modification made by Intel Corporation: © 2016 Intel Corporation
+
+All contributions by the University of California:
+Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
+All rights reserved.
+
+All other contributions:
+Copyright (c) 2014, 2015, the respective contributors
+All rights reserved.
+For the list of contributors go to https://github.com/BVLC/caffe/blob/master/CONTRIBUTORS.md
+
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Intel Corporation nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <vector>
+#include <string>
+#include "caffe/blob.hpp"
+#include "caffe/util/remove_batch_norm.hpp"
+#include "caffe/util/math_functions.hpp"
+namespace caffe {
+
+template <typename Dtype>
+void RecoverScaleFromBN(const LayerParameter& bn_layer_param, LayerParameter& scale_layer_param, Dtype default_scale_weights, Dtype default_scale_bias) {
+  CHECK(bn_layer_param.blobs_size() >= 3) << "BatchNorm Layer's blob size must be 3 at least!" << std::endl;
+  CHECK(bn_layer_param.type().compare("BatchNorm") == 0) << "Scale layer can only be recovered from batch norm layer!" << std::endl;
+  scale_layer_param.set_name("scale_" + bn_layer_param.name());
+  scale_layer_param.set_type("Scale");
+  scale_layer_param.set_phase(TEST);
+  //Assume the scale layer commonly use in-place top/bottom
+  scale_layer_param.add_top(const_cast<string &>(bn_layer_param.top(0)));
+  scale_layer_param.add_bottom(const_cast<string &>(bn_layer_param.top(0)));
+  int bn_layer_blob_size = bn_layer_param.blobs_size();
+  //Pre-assumption: scale layer weight and bias blob have same shape
+  if (bn_layer_blob_size == 5) {
+    scale_layer_param.add_blobs()->CopyFrom(bn_layer_param.blobs(3));
+    scale_layer_param.add_blobs()->CopyFrom(bn_layer_param.blobs(4));
+  } else if (bn_layer_blob_size == 4) {
+    scale_layer_param.add_blobs()->CopyFrom(bn_layer_param.blobs(3));
+    Blob<Dtype> scale_bias_blob, scale_weight_blob;
+    scale_weight_blob.FromProto(scale_layer_param.blobs(0));
+    scale_bias_blob.ReshapeLike(scale_weight_blob);
+    caffe_set(scale_bias_blob.count(), default_scale_bias, scale_bias_blob.mutable_cpu_data());
+    BlobProto scale_bias_blob_proto;
+    scale_bias_blob.ToProto(&scale_bias_blob_proto, false);
+    scale_layer_param.add_blobs()->CopyFrom(scale_bias_blob_proto);
+  } else {
+    Blob<Dtype> scale_weight_blob, scale_bias_blob, bn_mean_blob;
+    BlobProto scale_weight_blob_proto, scale_bias_blob_proto;
+    bn_mean_blob.FromProto(bn_layer_param.blobs(0));
+    vector<int> scale_shape_vec;
+    scale_shape_vec.resize(1);
+    scale_shape_vec[0] = bn_mean_blob.shape(0);
+    scale_weight_blob.Reshape(scale_shape_vec);
+    scale_bias_blob.Reshape(scale_shape_vec);
+    caffe_set(scale_weight_blob.count(), default_scale_weights, scale_weight_blob.mutable_cpu_data());
+    caffe_set(scale_bias_blob.count(), default_scale_bias, scale_bias_blob.mutable_cpu_data());
+    scale_weight_blob.ToProto(&scale_weight_blob_proto, false);
+    scale_bias_blob.ToProto(&scale_bias_blob_proto, false);
+    scale_layer_param.add_blobs()->CopyFrom(scale_weight_blob_proto);
+    scale_layer_param.add_blobs()->CopyFrom(scale_bias_blob_proto);
+  }
+}
+void MergeLayer(LayerParameter &layer1,
+                const LayerParameter &layer2)
+{
+  string &layer1_top_blob_name = const_cast<string &>(layer1.top(0));
+  const string &layer2_top_blob_name = layer2.top(0);
+
+  // Replace Conv top name with Scale top name
+  layer1_top_blob_name.resize(layer2_top_blob_name.size());
+  layer1_top_blob_name.replace(0, layer2_top_blob_name.size(), layer2_top_blob_name);
+  return;
+}
+
+
+template <typename Dtype>
+void AdjustConvLayer(LayerParameter &conv_layer,
+                     const LayerParameter &batch_norm_layer,
+                     const LayerParameter &scale_layer, bool is_net_init) {
+  if (is_net_init) {
+    if (!conv_layer.convolution_param().bias_term()) {
+      //We will merge batch norm and scale layer to con layer, if conv layer doesn't use bias, adjust it!
+      conv_layer.mutable_convolution_param()->set_bias_term(true);
+    }
+  } else {
+    Blob<Dtype> conv_weight_blob, conv_bias_blob;
+    Blob<Dtype> scale_weight_blob, scale_bias_blob;
+    Blob<Dtype> bn_mean_blob, bn_variance_blob, bn_scale_factor_blob;
+    Dtype bn_scale_factor;
+    Dtype bn_eps = batch_norm_layer.batch_norm_param().eps();
+
+    conv_weight_blob.FromProto(conv_layer.blobs(0), true);
+    if (!conv_layer.convolution_param().bias_term()) {
+      conv_layer.mutable_convolution_param()->set_bias_term(true);
+      vector<int> conv_bias_shape_vec;
+      conv_bias_shape_vec.resize(1);
+      conv_bias_shape_vec[0] = conv_weight_blob.shape(0);
+      conv_bias_blob.Reshape(conv_bias_shape_vec);
+      caffe_set(conv_bias_blob.count(), (Dtype)0, conv_bias_blob.mutable_cpu_data());
+      BlobProto conv_bias_blob_proto;
+      conv_bias_blob.ToProto(&conv_bias_blob_proto, false);
+      conv_layer.add_blobs()->CopyFrom(conv_bias_blob_proto);
+    } else {
+      conv_bias_blob.FromProto(conv_layer.blobs(1), true);
+    }
+
+    //We assume scale layer use weight & bias, but is bias necessary? Need confirm!
+    scale_weight_blob.FromProto(scale_layer.blobs(0), true);
+    scale_bias_blob.FromProto(scale_layer.blobs(1), true);
+    bn_mean_blob.FromProto(batch_norm_layer.blobs(0), true);
+    bn_variance_blob.FromProto(batch_norm_layer.blobs(1), true);
+    bn_scale_factor_blob.FromProto(batch_norm_layer.blobs(2), true);
+    bn_scale_factor = bn_scale_factor_blob.cpu_data()[0] == 0 ? 1 : (1 / bn_scale_factor_blob.cpu_data()[0]);
+    CHECK_EQ(bn_variance_blob.shape(0), scale_weight_blob.shape(0));
+    CHECK_EQ(conv_weight_blob.shape(0), scale_weight_blob.shape(0));
+    CHECK_EQ(scale_weight_blob.count(), bn_variance_blob.count());
+    int alpha_count = scale_weight_blob.count();
+    Dtype alpha, scale_weight_val, bn_variance_val;
+    Dtype * conv_weight_buf = conv_weight_blob.mutable_cpu_data();
+    Dtype * conv_bias_buf = conv_bias_blob.mutable_cpu_data();
+    const Dtype * scale_bias_buf = scale_bias_blob.cpu_data();
+    const Dtype * bn_mean_buf = bn_mean_blob.cpu_data();
+    int weight_count = conv_weight_blob.count() / conv_weight_blob.shape(0);
+    for (int i = 0; i < alpha_count; i++) {
+      scale_weight_val = scale_weight_blob.cpu_data()[i];
+      bn_variance_val = bn_variance_blob.cpu_data()[i];
+      alpha = scale_weight_val / (std::sqrt(bn_variance_val * bn_scale_factor + bn_eps));
+      conv_bias_buf[i] = conv_bias_buf[i] * alpha + (scale_bias_buf[i] -(bn_mean_buf[i] * bn_scale_factor * alpha));
+      Dtype * weight_area = conv_weight_buf + i * weight_count;
+      caffe_scal(weight_count, alpha, weight_area);
+
+    }
+    BlobProto *updated_weight_blob_proto = conv_layer.mutable_blobs(0);
+    BlobProto *updated_bias_blob_proto = conv_layer.mutable_blobs(1);
+    conv_weight_blob.ToProto(updated_weight_blob_proto);
+    conv_bias_blob.ToProto(updated_bias_blob_proto);
+  }
+
+}
+
+
+template <typename Dtype>
+void RecoverBNScaleMergedNet(NetParameter * net_param, NetParameter* recovered_net_param) {
+  CHECK(net_param != NULL && recovered_net_param != NULL) << "Can NOT recover a NULL network!" << std::endl;
+  int kept_bn_layers_num = net_param->compile_net_state().kept_bn_layers_size();
+  int idx;
+  bool in_kept_list = false;
+  for (int i = 0; i < net_param->layer_size(); ++i) {
+    const LayerParameter layer_param = net_param->layer(i);
+    recovered_net_param->add_layer()->CopyFrom(layer_param);
+
+    if (layer_param.type().compare("BatchNorm") == 0 && layer_param.blobs_size() >= 3) {
+      for (idx = 0; idx < kept_bn_layers_num; ++idx) {
+        if (layer_param.name().compare(net_param->compile_net_state().kept_bn_layers(idx)) == 0) {
+          in_kept_list = true;
+          break;
+        }
+      }
+
+      if (in_kept_list) continue;
+      shared_ptr<LayerParameter> scale_layer_param(new LayerParameter());
+      RecoverScaleFromBN(layer_param, *scale_layer_param, (Dtype)1, (Dtype)0);
+      recovered_net_param->add_layer()->CopyFrom(*scale_layer_param);
+    }
+  }
+}
+
+template void RecoverScaleFromBN<float>(const LayerParameter& bn_layer_param, LayerParameter& scale_layer_param, float default_scale_weights, float default_scale_bias);
+template void RecoverScaleFromBN<double>(const LayerParameter& bn_layer_param, LayerParameter& scale_layer_param, double default_scale_weights, double default_scale_bias);
+template void AdjustConvLayer<float>(LayerParameter &conv_layer,
+                     const LayerParameter &batch_norm_layer,
+                     const LayerParameter &scale_layer, bool is_net_init);
+
+template void AdjustConvLayer<double>(LayerParameter &conv_layer,
+                     const LayerParameter &batch_norm_layer,
+                     const LayerParameter &scale_layer, bool is_net_init);
+
+template void RecoverBNScaleMergedNet<float>(NetParameter * net_param, NetParameter* recovered_net_param);
+template void RecoverBNScaleMergedNet<double>(NetParameter * net_param, NetParameter* recovered_net_param);
+}

From 7a01fb1466011f5b407eb16b85daae58ca543b76 Mon Sep 17 00:00:00 2001
From: "Jin, Ge" <ge.jin@intel.com>
Date: Tue, 27 Jun 2017 14:34:08 -0400
Subject: [PATCH 30/34] Fix bug in RemoveBNScale feature

Signed-off-by: Jin, Ge <ge.jin@intel.com>
---
 src/caffe/net.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 10d39fc54..184c99eb8 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -139,16 +139,13 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
   NetParameter compiled_param;
   // Transform Net (merge layers etc.) improve computational performance
   CompileNet(param, &compiled_param);
-
-
-
+  param = compiled_param;
   this->bn_scale_remove_ = param.compile_net_state().bn_scale_remove();
   this->bn_scale_merge_ = param.compile_net_state().bn_scale_merge();
   int kept_bn_layers_num = param.compile_net_state().kept_bn_layers_size();
   for (int idx = 0; idx < kept_bn_layers_num; ++idx) {
     this->kept_bn_layers_.push_back(param.compile_net_state().kept_bn_layers(idx));
   }
-  param = compiled_param;
 
 #ifdef USE_MLSL
   NetParameter param_with_mn;

From 28cf6b8d2eb81ca312f344a3f5a3f1cacc2ea53b Mon Sep 17 00:00:00 2001
From: "Shen, Haihao" <haihao.shen@intel.com>
Date: Wed, 28 Jun 2017 09:23:15 +0800
Subject: [PATCH 31/34] Fix Winograd fallback to direct convolution

---
 src/caffe/layers/mkldnn_convolution_layer.cpp | 62 ++++++++-----------
 1 file changed, 25 insertions(+), 37 deletions(-)

diff --git a/src/caffe/layers/mkldnn_convolution_layer.cpp b/src/caffe/layers/mkldnn_convolution_layer.cpp
index a9649ad79..f6b79532b 100644
--- a/src/caffe/layers/mkldnn_convolution_layer.cpp
+++ b/src/caffe/layers/mkldnn_convolution_layer.cpp
@@ -196,26 +196,20 @@ void MKLDNNConvolutionLayer<Dtype>::InitConvolutionFwd(const vector<Blob<Dtype>*
         shared_ptr<convolution_relu_forward::desc> convReluFwd_desc;
         if(relu) convReluFwd_desc.reset(new convolution_relu_forward::desc(*convFwd_desc, negative_slope));
 
-        try {
-            for(subEngineIndex=0; subEngineIndex < ep.getNumberOfSubEngines(); subEngineIndex++) {
-                try {
-                    convFwd_pd.reset(new convolution_forward::primitive_desc(*convFwd_desc,
-                                                                             ep.getMKLDNNSubEngine(subEngineIndex)));
-                    if(relu) convReluFwd_pd.reset(new convolution_relu_forward::primitive_desc(*convReluFwd_desc,
-                                                                                               ep.getMKLDNNSubEngine(subEngineIndex)));
-                }
-                catch(...) {
-                    continue;
-                }
-                break;
+        for(subEngineIndex=0; subEngineIndex < ep.getNumberOfSubEngines(); subEngineIndex++) {
+            try {
+                convFwd_pd.reset(new convolution_forward::primitive_desc(*convFwd_desc,
+                                                                         ep.getMKLDNNSubEngine(subEngineIndex)));
+                if(relu) convReluFwd_pd.reset(new convolution_relu_forward::primitive_desc(*convReluFwd_desc,
+                                                                                           ep.getMKLDNNSubEngine(subEngineIndex)));
             }
-            if ((!convFwd_pd) || (relu && !convReluFwd_pd))
-                break;
-        }
-        catch(...) {
-            continue;
+            catch(...) {
+                continue;
+            }
+            break;
         }
-        break;
+        if ((convFwd_pd) && (!relu || convReluFwd_pd))
+                break;
     }
 
     CHECK(convFwd_pd);
@@ -378,27 +372,21 @@ void MKLDNNConvolutionLayer<Dtype>::InitConvolutionBwd(const vector<Blob<Dtype>*
                                                                    , init_bottom_md, init_weights_md, init_top_md
                                                                    , convolutionStrides, padding, padding, padding_kind::zero));
 
-        try {
-            for(subEngineIndex=0; subEngineIndex < ep.getNumberOfSubEngines(); subEngineIndex++) {
-                try {
-                    convBwdData_pd.reset(new convolution_backward_data::primitive_desc(*convBwdData_desc,
-                                                                                       ep.getMKLDNNSubEngine(subEngineIndex), *convFwd_pd));
-                    
-                    convBwdWeights_pd.reset(new convolution_backward_weights::primitive_desc(*convBwdWeights_desc,
-                                                                                             ep.getMKLDNNSubEngine(subEngineIndex), *convFwd_pd));
-                }
-                catch(...) {
-                    continue;
-                }
-                break;
+        for(subEngineIndex=0; subEngineIndex < ep.getNumberOfSubEngines(); subEngineIndex++) {
+            try {
+                convBwdData_pd.reset(new convolution_backward_data::primitive_desc(*convBwdData_desc,
+                                                                                   ep.getMKLDNNSubEngine(subEngineIndex), *convFwd_pd));
+
+                convBwdWeights_pd.reset(new convolution_backward_weights::primitive_desc(*convBwdWeights_desc,
+                                                                                         ep.getMKLDNNSubEngine(subEngineIndex), *convFwd_pd));
             }
-            if (!convBwdData_pd || !convBwdWeights_pd)
-                break;
-        }
-        catch(...) {
-            continue;
+            catch(...) {
+                continue;
+            }
+            break;
         }
-        break;
+        if (convBwdData_pd && convBwdWeights_pd)
+            break;
     }
 
     CHECK(convBwdData_pd);

From 792b0d18abcf0c171a09840d3567dbbd4d2b39e9 Mon Sep 17 00:00:00 2001
From: "Jin, Ge" <ge.jin@intel.com>
Date: Wed, 28 Jun 2017 12:00:06 -0400
Subject: [PATCH 32/34] Fix bug in RemoveBNScale feature with GoogleNetv2

Currently, we don't handle fc--bn--scale structure, so the bn layers
should be added to kept_bn_layers list.

Signed-off-by: Jin, Ge <ge.jin@intel.com>
---
 src/caffe/net.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 184c99eb8..1b738c3d8 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -852,6 +852,9 @@ void Net<Dtype>::RemoveBNScale(const NetParameter& param,
       layers_to_drop.erase(layers_to_drop.find(layer_param->name()));
     }
     if (layer_included) {
+            if (layer_param->type().compare("BatchNorm") == 0) {
+              param_compiled->mutable_compile_net_state()->add_kept_bn_layers(layer_param->name());
+            }
             param_compiled->add_layer()->CopyFrom(*layer_param);
     }
   }

From 881163af5d04606f89b946a441e2b9d5a994dfb3 Mon Sep 17 00:00:00 2001
From: "Jin, Ge" <ge.jin@intel.com>
Date: Mon, 3 Jul 2017 16:30:59 -0400
Subject: [PATCH 33/34] Fix bug in RemoveBNScale

To handle "conv----bn" structure

Signed-off-by: Jin, Ge <ge.jin@intel.com>
---
 src/caffe/net.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 1b738c3d8..a7bbe8170 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -839,7 +839,9 @@ void Net<Dtype>::RemoveBNScale(const NetParameter& param,
             shared_ptr<LayerParameter> scale_layer_param(new LayerParameter());
             RecoverScaleFromBN(child_layer_param, *scale_layer_param, (Dtype)1, (Dtype)0);
             AdjustConvLayer<Dtype>(*layer_param, child_layer_param, *scale_layer_param, is_net_init);
-          }
+          } else {
+            AdjustConvLayer<Dtype>(*layer_param, child_layer_param, grandchild_layer_param, true);
+		  }
           if (bn_scale_remove == false) bn_scale_remove = true;
           layers_to_drop.insert(child_layer_param.name());
         }

From 97cd241872c8bf0c94efdca29f279ac8a70bc801 Mon Sep 17 00:00:00 2001
From: "Jin, Ge" <ge.jin@intel.com>
Date: Tue, 4 Jul 2017 14:45:56 -0400
Subject: [PATCH 34/34] Fix bug in RemoveBNScale

Signed-off-by: Jin, Ge <ge.jin@intel.com>
---
 src/caffe/net.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index a7bbe8170..19e3dd7e1 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -1280,8 +1280,8 @@ void Net<Dtype>::ShareTrainedLayersWith(const Net* other) {
     }
     //temp_net_param.mutable_compile_net_state()->set_bn_top_rename(other->bn_top_rename_);
     complete_net_param.CopyFrom(temp_net_param);
-    complete_net_param.clear_layer();
     if (other->bn_scale_merge_) {
+      complete_net_param.clear_layer();
       RecoverBNScaleMergedNet<Dtype>(&temp_net_param, &complete_net_param);
     }
     CopyTrainedLayersFrom(complete_net_param);