From d0dcb2e280ed38bfe937a22e485c33af374ce3cc Mon Sep 17 00:00:00 2001 From: "Zhang, Guoming" Date: Thu, 29 Jun 2017 14:35:45 +0800 Subject: [PATCH 01/54] Fix for ICL-146 Convnet benchmark segmentation fault with MKLDNN --- src/caffe/layers/mkldnn_convolution_layer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/caffe/layers/mkldnn_convolution_layer.cpp b/src/caffe/layers/mkldnn_convolution_layer.cpp index f6b79532b..f7469ba43 100644 --- a/src/caffe/layers/mkldnn_convolution_layer.cpp +++ b/src/caffe/layers/mkldnn_convolution_layer.cpp @@ -273,9 +273,9 @@ void MKLDNNConvolutionLayer::InitConvolutionFwd(const vector* , *fwd_top_data_memory)); } } - fwd_bottom_data->set_mkldnn_primitive(convFwd); //Wrong passed primitive! (TODO: Checking!) - //MKLDNNPrimitive fwd_bottom_data_primitive_transfer(fwd_bottom_data_primitive); - //fwd_bottom_data->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); + //fwd_bottom_data->set_mkldnn_primitive(convFwd); //Wrong passed primitive! (For sure!) + MKLDNNPrimitive fwd_bottom_data_primitive_transfer(fwd_bottom_data_primitive); + fwd_bottom_data->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); fwd_top_data->set_mkldnn_primitive(convFwd); From 8f33907843cd7e1ead138b7db6dec9bca4265f6b Mon Sep 17 00:00:00 2001 From: "Shen, Haihao" Date: Fri, 30 Jun 2017 16:21:45 +0800 Subject: [PATCH 02/54] Add tool to tune convolution algorithm (direct to winograd) --- examples/pycaffe/tune_model.py | 99 ++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 examples/pycaffe/tune_model.py diff --git a/examples/pycaffe/tune_model.py b/examples/pycaffe/tune_model.py new file mode 100644 index 000000000..d9350d69e --- /dev/null +++ b/examples/pycaffe/tune_model.py @@ -0,0 +1,99 @@ +import os +import datetime +import copy +import argparse + +from caffe.proto import caffe_pb2 +import google.protobuf.text_format as txtf +import caffe + +def isWinogradApplicable(ic, oc, stride, kernel_size): + if ic % 16 != 0: + return False + if oc % 16 != 0: + return False + if stride != 1: + return False + if kernel_size != 3: + return False + + return True + +def genHybridModel(net, winogradLayers, modelName): + newNet = copy.deepcopy(net) + newNetName = modelName.split(".")[0] + "_hybrid.prototxt" + for layer in winogradLayers: + newNet.layer[layer].convolution_param.conv_algorithm = "winograd" + with open(newNetName, 'w') as f: + f.write(str(newNet)) + print "[INFO] Complete model tuning with Winograd:", newNetName + +def tuneModelDefinition(model): + net = caffe_pb2.NetParameter() + with open(model) as f: + s = f.read() + txtf.Merge(s, net) + + net.name = 'Tuned model of ' + net.name + output_layer_map = {} + for index in range(0, len(net.layer)): + l = net.layer[index] + if l.type == ("Convolution"): + stride = 0 + kernel_size = 0 + if len(l.convolution_param.stride) == 0: + stride = 1 + else: + stride = l.convolution_param.stride[0] + kernel_size = l.convolution_param.kernel_size[0] + ic = 0 + if l.bottom[0] in output_layer_map.keys(): + ic = output_layer_map[l.bottom[0]][4] + oc = l.convolution_param.num_output + output_layer_map[l.name] = (index, stride, kernel_size, ic, oc, True) + elif l.type == ("InnerProduct"): + oc = l.inner_product_param.num_output + ic = 0 + if l.bottom[0] in output_layer_map.keys(): + ic = output_layer_map[l.bottom[0]][4] + output_layer_map[l.name] = (index, 0, 0, ic, oc, False) + elif l.type.endswith("Data"): + # TODO: correct the output + # dynamic_net = caffe.Net(model, caffe.TEST) + # for k, v in dynamic_net.blobs.items(): + # dynamic_net_map[k] = v.data.shape + ic = oc = 3 + output_layer_map[l.name] = (index, 0, 0, ic, oc, False) + else: + ic = 0 + if l.bottom[0] in output_layer_map.keys(): + ic = output_layer_map[l.bottom[0]][4] + oc = ic + output_layer_map[l.name] = (index, 0, 0, ic, oc, False) + + winograd_convolutions = [] + for k,v in output_layer_map.items(): + if v[5] and isWinogradApplicable(v[3], v[4], v[1], v[2]): + winograd_convolutions.append(v[0]) + + if len(winograd_convolutions) > 0: + genHybridModel(net, winograd_convolutions, model) + else: + print "[INFO] No need to tune model with Winograd:", model + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + + parser.add_argument('-m', '--model', action='store', dest='model', default="", + help='require the model definition (prototxt)') + + parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.0') + + params = parser.parse_args() + + model = params.model + if not os.path.exists(params.model): + print "[ERROR] Please specify the model definition file with -m" + exit(1) + + tuneModelDefinition(model) From a7d825680947371a4e540abc9e76c0f4f2fe3a31 Mon Sep 17 00:00:00 2001 From: "Shen, Haihao" Date: Sat, 1 Jul 2017 21:50:14 +0800 Subject: [PATCH 03/54] Add tool to tune optimal engine --- examples/pycaffe/tune_engine.py | 212 ++++++++++++++++++++++++++++++++ 1 file changed, 212 insertions(+) create mode 100755 examples/pycaffe/tune_engine.py diff --git a/examples/pycaffe/tune_engine.py b/examples/pycaffe/tune_engine.py new file mode 100755 index 000000000..371b74f70 --- /dev/null +++ b/examples/pycaffe/tune_engine.py @@ -0,0 +1,212 @@ +import os +import sys +import copy +import argparse + +from caffe.proto import caffe_pb2 +import google.protobuf.text_format as txtf + +def readFile(filePath): + lines = [] + file = open(filePath, 'r') + for line in file.readlines(): + lines.append(line) + file.close() + + return lines + +def writeFile(filePath, lines): + file = open(filePath, 'w+') + file.write(lines) + file.close() + +def parseLog(log): + lines = readFile(log) + model_start = False + time_start = False + model_lines = [] + time_lines = [] + for line in lines: + trim_line = line.strip() + if trim_line.endswith("Initializing net from parameters:"): + model_start = True + continue + if model_start: + if trim_line.find("Creating layer") <> -1: + model_start = False + continue + model_lines.append(line) + + if trim_line.endswith("Average time per layer:"): + time_start = True + continue + if time_start: + if trim_line.find("Average Forward pass") <> -1: + time_start = False + break + time_lines.append(line) + + model_lines = model_lines[1:] + model_str = "" + for line in model_lines: + model_str = model_str + line + + return (model_str, time_lines) + +def parseTimeLines(timeLines): + layer_map = {} + for line in timeLines: + trim_line = line.strip() + items = trim_line.split("\t") + layer_items = items[0].split(" ") + layer_name = layer_items[-1] + time_items = items[1].split(" ") + if layer_name not in layer_map.keys(): + layer_map[layer_name] = (float)(time_items[1]) + else: + layer_map[layer_name] = layer_map[layer_name] + (float)(time_items[1]) + + return layer_map + +def parseModelStr(modelStr): + net = caffe_pb2.NetParameter() + txtf.Merge(modelStr, net) + layer_model_map = {} + global_engine = "CAFFE" + if net.engine != "": + global_engine = net.engine + for index in range(0, len(net.layer)): + engine = global_engine + l = net.layer[index] + if l.engine != "": + engine = l.engine + param_engine = -1 + if l.type == "Convolution" or l.type == "Deconvolution": + if l.convolution_param.engine != "": + param_engine = l.convolution_param.engine + elif l.type == "BatchNorm": + if l.batch_norm_param.engine != "": + param_engine = l.batch_norm_param.engine + elif l.type == "Concat": + if l.concat_param.engine != "": + param_engine = l.concat_param.engine + elif l.type == "Eltwise": + if l.eltwise_param.engine != "": + param_engine = l.eltwise_param.engine + elif l.type == "InnerProduct": + if l.inner_product_param.engine != "": + param_engine = l.inner_product_param.engine + elif l.type == "LRN": + if l.lrn_param.engine != "": + param_engine = l.lrn_param.engine + elif l.type == "Pooling": + if l.pooling_param.engine != "": + param_engine = l.pooling_param.engine + elif l.type == "ReLU": + if l.relu_param.engine != "": + param_engine = l.relu_param.engine + + if param_engine == 0 or param_engine == 1: + engine = "CAFFE" + elif param_engine == 3: + engine = "MKL2017" + elif param_engine == 4: + engine = "MKLDNN" + layer_model_map[l.name] = (index, engine, l) + + return (net, layer_model_map) + +def selectOptimalEngine(layers): + optimal_layer = None + min_time = sys.float_info.max + for layer in layers: + if layer[2] < min_time: + min_time = layer[2] + optimal_layer = layer + + return optimal_layer + +def tuneEngine(logs, model): + if len(logs) <= 1: + print "[ERROR] Please specify two or more log files" + exit(1) + + for log in logs: + if not os.path.exists(log): + print "[ERROR] Please specify valid log file:", log + exit(1) + + layer_map = {} + net = None + for log in logs: + log_name = os.path.basename(log) + (model_str, time_lines) = parseLog(log) + (net, layer_model_map) = parseModelStr(model_str) + layer_time_map = parseTimeLines(time_lines) + for k, v in layer_model_map.items(): + if k not in layer_map.keys(): + layer_map[k] = [(v[0], v[1], layer_time_map[k], v[2])] + else: + layer_map_v = layer_map[k] + layer_map_v.append((v[0], v[1], layer_time_map[k], v[2])) + layer_map[k] = layer_map_v + + optimal_layer_map = {} + for k, v in layer_map.items(): + optimal_layer = selectOptimalEngine(v) + assert(optimal_layer != None) + # TODO: assign optimal layer object + optimal_layer_map[optimal_layer[0]] = optimal_layer[1] + + genModel(net, model, optimal_layer_map) + +def engineStrToInt(engine_str): + if engine_str == "CAFFE": + return 1 + elif engine_str == "MKL2017": + return 3 + else: + return 4 + +def genModel(net, model, optimal_layer_map): + new_net = copy.deepcopy(net) + for index in range(0, len(net.layer)): + l = new_net.layer[index] + if l.type.endswith("Data"): + continue + param_engine = engineStrToInt(optimal_layer_map[index]) + if l.type == "Convolution" or l.type == "Deconvolution": + l.convolution_param.engine = param_engine + if param_engine == 3: + l.convolution_param.conv_algorithm = "direct" + elif l.type == "BatchNorm": + l.batch_norm_param.engine = param_engine + elif l.type == "Concat": + l.concat_param.engine = param_engine + elif l.type == "Eltwise": + l.eltwise_param.engine = param_engine + elif l.type == "InnerProduct": + l.inner_product_param.engine = param_engine + elif l.type == "LRN": + l.lrn_param.engine = param_engine + elif l.type == "Pooling": + l.pooling_param.engine = param_engine + elif l.type == "ReLU": + l.relu_param.engine = param_engine + + with open(model, 'w') as f: + f.write(str(new_net)) + print "[INFO] Complete model engine tuning:", model + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + + parser.add_argument('-l', '--logs', nargs='+', help='require the caffe time logs', required=True) + + parser.add_argument('-o', '--output', action='store', dest='output', default="", + help='require the model output') + + parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.0') + + params = parser.parse_args() + tuneEngine(params.logs, params.output) From c984cba76f394e6272cce60a81dc64422d4d66ad Mon Sep 17 00:00:00 2001 From: "Shen, Haihao" Date: Sun, 2 Jul 2017 21:20:36 +0800 Subject: [PATCH 04/54] Use the layer object for engine tuning --- examples/pycaffe/tune_engine.py | 46 +++++++++------------------------ 1 file changed, 12 insertions(+), 34 deletions(-) diff --git a/examples/pycaffe/tune_engine.py b/examples/pycaffe/tune_engine.py index 371b74f70..850b94929 100755 --- a/examples/pycaffe/tune_engine.py +++ b/examples/pycaffe/tune_engine.py @@ -155,48 +155,26 @@ def tuneEngine(logs, model): for k, v in layer_map.items(): optimal_layer = selectOptimalEngine(v) assert(optimal_layer != None) - # TODO: assign optimal layer object - optimal_layer_map[optimal_layer[0]] = optimal_layer[1] + optimal_layer_map[optimal_layer[0]] = optimal_layer[3] genModel(net, model, optimal_layer_map) -def engineStrToInt(engine_str): - if engine_str == "CAFFE": - return 1 - elif engine_str == "MKL2017": - return 3 - else: - return 4 - def genModel(net, model, optimal_layer_map): - new_net = copy.deepcopy(net) + net_str = "" + net_str += "name: \"" + net.name + "\"\n" for index in range(0, len(net.layer)): - l = new_net.layer[index] + net_str += "layer {\n" + l = net.layer[index] if l.type.endswith("Data"): + net_str += str(l) + "\n}\n" continue - param_engine = engineStrToInt(optimal_layer_map[index]) - if l.type == "Convolution" or l.type == "Deconvolution": - l.convolution_param.engine = param_engine - if param_engine == 3: - l.convolution_param.conv_algorithm = "direct" - elif l.type == "BatchNorm": - l.batch_norm_param.engine = param_engine - elif l.type == "Concat": - l.concat_param.engine = param_engine - elif l.type == "Eltwise": - l.eltwise_param.engine = param_engine - elif l.type == "InnerProduct": - l.inner_product_param.engine = param_engine - elif l.type == "LRN": - l.lrn_param.engine = param_engine - elif l.type == "Pooling": - l.pooling_param.engine = param_engine - elif l.type == "ReLU": - l.relu_param.engine = param_engine - + l = optimal_layer_map[index] + net_str += str(l) + "\n}\n" with open(model, 'w') as f: - f.write(str(new_net)) - print "[INFO] Complete model engine tuning:", model + net = caffe_pb2.NetParameter() + txtf.Merge(net_str, net) + f.write(str(net)) + print "[INFO] Complete model engine tuning:", model if __name__ == '__main__': parser = argparse.ArgumentParser() From 50e4c2594e4033d827b5ad006c4f9d6d3df0a594 Mon Sep 17 00:00:00 2001 From: "Jin, Ge" Date: Mon, 3 Jul 2017 14:49:49 -0400 Subject: [PATCH 05/54] Fix bug in RemoveBNScale Handle "Conv---BN" structure in network init Signed-off-by: Jin, Ge --- src/caffe/net.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index 1b738c3d8..ecb1a1779 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -839,6 +839,8 @@ void Net::RemoveBNScale(const NetParameter& param, shared_ptr scale_layer_param(new LayerParameter()); RecoverScaleFromBN(child_layer_param, *scale_layer_param, (Dtype)1, (Dtype)0); AdjustConvLayer(*layer_param, child_layer_param, *scale_layer_param, is_net_init); + } else { + AdjustConvLayer(*layer_param, child_layer_param, grandchild_layer_param, true); } if (bn_scale_remove == false) bn_scale_remove = true; layers_to_drop.insert(child_layer_param.name()); From dbdc8131f51d479e1888273b7d8be6e59f74b0db Mon Sep 17 00:00:00 2001 From: "Yu, Chong" Date: Tue, 4 Jul 2017 11:15:13 +0800 Subject: [PATCH 06/54] Fix the issue of Caffe time raised from Github. --- src/caffe/layers/mkldnn_convolution_layer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/caffe/layers/mkldnn_convolution_layer.cpp b/src/caffe/layers/mkldnn_convolution_layer.cpp index f7469ba43..1b9878fd6 100644 --- a/src/caffe/layers/mkldnn_convolution_layer.cpp +++ b/src/caffe/layers/mkldnn_convolution_layer.cpp @@ -473,9 +473,9 @@ void MKLDNNConvolutionLayer::InitConvolutionBwd(const vector* //MKLDNNPrimitive bwdw_bottom_data_primitive_transfer(bwdw_bottom_data_primitive); //bwdw_bottom_data->set_mkldnn_primitive(bwdw_bottom_data_primitive_transfer); - bwdw_top_diff->set_mkldnn_primitive(convBwdWeights); //Wrong passed primitive! (TODO: Checking!) - //MKLDNNPrimitive bwdw_top_diff_primitive_transfer(bwdw_top_diff_primitive); - //bwdw_top_diff->set_mkldnn_primitive(bwdw_top_diff_primitive_transfer); + //bwdw_top_diff->set_mkldnn_primitive(convBwdWeights); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdw_top_diff_primitive_transfer(bwdw_top_diff_primitive); + bwdw_top_diff->set_mkldnn_primitive(bwdw_top_diff_primitive_transfer); bwdw_weights_diff->set_mkldnn_primitive(convBwdWeights); From 7189dc564f6933a0a4266e99d36b90b71e852016 Mon Sep 17 00:00:00 2001 From: "Yu, Chong" Date: Tue, 4 Jul 2017 13:25:48 +0800 Subject: [PATCH 07/54] Fix the wrong passed primitive in the MKLDNN convolution layer. --- src/caffe/layers/mkldnn_convolution_layer.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/caffe/layers/mkldnn_convolution_layer.cpp b/src/caffe/layers/mkldnn_convolution_layer.cpp index 1b9878fd6..75fac94c1 100644 --- a/src/caffe/layers/mkldnn_convolution_layer.cpp +++ b/src/caffe/layers/mkldnn_convolution_layer.cpp @@ -460,20 +460,20 @@ void MKLDNNConvolutionLayer::InitConvolutionBwd(const vector* bwdd_bottom_diff->set_mkldnn_primitive(convBwdData); - bwdd_top_diff->set_mkldnn_primitive(convBwdData); //Wrong passed primitive! (TODO: Checking!) - //MKLDNNPrimitive bwdd_top_diff_primitive_transfer(bwdd_top_diff_primitive); - //bwdd_top_diff->set_mkldnn_primitive(bwdd_top_diff_primitive_transfer); + //bwdd_top_diff->set_mkldnn_primitive(convBwdData); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdd_top_diff_primitive_transfer(bwdd_top_diff_primitive); + bwdd_top_diff->set_mkldnn_primitive(bwdd_top_diff_primitive_transfer); //bwdd_weights_data->set_mkldnn_primitive(convBwdData); //Wrong passed primitive! (For sure!) MKLDNNPrimitive bwdd_weights_data_primitive_transfer(bwdd_weights_data_primitive); bwdd_weights_data->set_mkldnn_primitive(bwdd_weights_data_primitive_transfer); - bwdw_bottom_data->set_mkldnn_primitive(convBwdWeights); //Wrong passed primitive! (TODO: Checking!) - //MKLDNNPrimitive bwdw_bottom_data_primitive_transfer(bwdw_bottom_data_primitive); - //bwdw_bottom_data->set_mkldnn_primitive(bwdw_bottom_data_primitive_transfer); + //bwdw_bottom_data->set_mkldnn_primitive(convBwdWeights); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdw_bottom_data_primitive_transfer(bwdw_bottom_data_primitive); + bwdw_bottom_data->set_mkldnn_primitive(bwdw_bottom_data_primitive_transfer); - //bwdw_top_diff->set_mkldnn_primitive(convBwdWeights); //Wrong passed primitive! (TODO: Checking!) + //bwdw_top_diff->set_mkldnn_primitive(convBwdWeights); //Wrong passed primitive! (For sure!) MKLDNNPrimitive bwdw_top_diff_primitive_transfer(bwdw_top_diff_primitive); bwdw_top_diff->set_mkldnn_primitive(bwdw_top_diff_primitive_transfer); From a0fdbf07bb15e6e357e80f63541257b3a4f646a3 Mon Sep 17 00:00:00 2001 From: "Yu, Chong" Date: Wed, 5 Jul 2017 00:57:10 +0800 Subject: [PATCH 08/54] Update and refine the shell script to download the MLSL release and handle the old version of MLSL. --- external/mlsl/prepare_mlsl.sh | 57 +++++++++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 9 deletions(-) diff --git a/external/mlsl/prepare_mlsl.sh b/external/mlsl/prepare_mlsl.sh index 72fdb95f5..0d56d7200 100755 --- a/external/mlsl/prepare_mlsl.sh +++ b/external/mlsl/prepare_mlsl.sh @@ -25,25 +25,58 @@ fi if [ -z $VERSION_LINE ]; then VERSION_LINE=0 fi -if [ -z "$(echo $VERSION_LINE | sed -n "/^[0-9]\+$/p")" ];then +if [ -z "$(echo $VERSION_LINE | sed -n "/^[0-9]\+$/p")" ]; then #echo "[Debug] VERSION_LINE value contains other string or flags, not only numbers" VERSION_LINE=0 fi echo $VERSION_LINE # Return Version Line } +# Clean up the previous MLSL version +CleanUpPreviousMLSL2017_0_014() +{ +OLD_ARCHIVE_TARGZ=files.tar.gz +OLD_INSTALL_SHELL=install.sh +OLD_ARCHIVE_BASENAME=l_mlsl_p_2017.0.014.tgz +OLD_ARCHIVE_INSTALL_FOLDERNAME=l_mlsl_p_2017.0.014 +if [ -f $ABS_DST/$OLD_ARCHIVE_TARGZ ]; then + rm $ABS_DST/$OLD_ARCHIVE_TARGZ + #echo "[Debug] Delete old files.tar.gz!" +fi +if [ -f $ABS_DST/$OLD_INSTALL_SHELL ]; then + rm $ABS_DST/$OLD_INSTALL_SHELL + #echo "[Debug] Delete old install.sh file!" +fi +if [ -f $ABS_DST/$OLD_ARCHIVE_BASENAME ]; then + rm $ABS_DST/$OLD_ARCHIVE_BASENAME + #echo "[Debug] Delete old l_mlsl_p_2017.0.014.tgz file!" +fi +if [ -d $ABS_DST/$OLD_ARCHIVE_INSTALL_FOLDERNAME ]; then + rm -rf $ABS_DST/$OLD_ARCHIVE_INSTALL_FOLDERNAME + #echo "[Debug] Delete old l_mlsl_p_2017.0.014 folder!" +fi +} + # MLSL DST=`dirname $0` #echo "[Debug] dirname: $0" #echo "[Debug] DST value: $DST" ABS_DST=`readlink -f $DST` #echo "[Debug] ABS_DST value: $ABS_DST" -VERSION_MATCH=20170014 -ARCHIVE_BASENAME=l_mlsl_p_2017.0.014.tgz -ARCHIVE_INSTALL_FOLDERNAME=l_mlsl_p_2017.0.014 + +if [ -z $MLSL_ROOT ]; then + CleanUpPreviousMLSL2017_0_014 +fi + +VERSION_MATCH=20171016 +ARCHIVE_BASENAME=l_mlsl_2017.1.016.tgz +ARCHIVE_INSTALL_FOLDERNAME=l_mlsl_2017.1.016 +#because the l_mlsl_2017.1.016.tgz will unpacked files.tar.gz and install.sh to the ARCHIVE_INSTALL_FOLDERNAME +#not unpacked to the DST folder (Different behavior against l_mlsl_p_2017.0.014.tgz) +ARCHIVE_INSTALL_FOLDERNAME_TEMP=l_mlsl_2017.1.016_temp MLSL_CONTENT_DIR=`echo $ARCHIVE_BASENAME | rev | cut -d "." -f 2- | rev` #echo "[Debug] MLSL_CONTENT_DIR value: $MLSL_CONTENT_DIR" -GITHUB_RELEASE_TAG=v2017-Preview +GITHUB_RELEASE_TAG=v2017.1-Preview MLSLURL="https://github.com/01org/MLSL/releases/download/$GITHUB_RELEASE_TAG/$ARCHIVE_BASENAME" #echo "[Debug] MLSL_ROOT value: $MLSL_ROOT" @@ -65,15 +98,21 @@ if [ -z $MLSL_ROOT ] || [ $VERSION_LINE -lt $VERSION_MATCH ]; then #echo "[Debug] VERSION_LINE value inside if: $VERSION_LINE" #if MLSL_ROOT is not set - if [ -z $MLSL_ROOT ] ; then + if [ -z $MLSL_ROOT ]; then #if version is not given, or the version is lower than expected version - if [ $VERSION_LINE -lt $VERSION_MATCH ] ; then + if [ $VERSION_LINE -lt $VERSION_MATCH ]; then #Then downloaded, unpacked and installed wget --no-check-certificate -P $DST $MLSLURL -O $DST/$ARCHIVE_BASENAME - tar -xzf $DST/$ARCHIVE_BASENAME -C $DST + if [ ! -d $DST/$ARCHIVE_INSTALL_FOLDERNAME_TEMP ]; then + mkdir $DST/$ARCHIVE_INSTALL_FOLDERNAME_TEMP + #echo "[Debug] Create l_mlsl_2017.1.016_temp folder for unpacking!" + fi + tar -xzf $DST/$ARCHIVE_BASENAME -C $DST/$ARCHIVE_INSTALL_FOLDERNAME_TEMP #echo "[Debug] PWD value: $PWD" #install.sh did not support the relative path as the parameter - bash $DST/install.sh -s -d $ABS_DST/$ARCHIVE_INSTALL_FOLDERNAME + bash $DST/$ARCHIVE_INSTALL_FOLDERNAME_TEMP/$ARCHIVE_INSTALL_FOLDERNAME/install.sh -s -d $ABS_DST/$ARCHIVE_INSTALL_FOLDERNAME + rm -rf $DST/$ARCHIVE_INSTALL_FOLDERNAME_TEMP + #echo "[Debug] Remove l_mlsl_2017.1.016_temp folder for unpacking!" fi #else: version is just our expected version, no need to donload again, but need to set the MLSL_ROOT #do not change the value of MLSL_ROOT if MLSL_ROOT is set, but version is not given From e3ff0c8666bbbdf45e4e7f767b40ae06bed69658 Mon Sep 17 00:00:00 2001 From: "Yu, Chong" Date: Wed, 5 Jul 2017 10:07:24 +0800 Subject: [PATCH 09/54] Fix the wrong passed memory in the MKLDNN convolution layer. --- src/caffe/layers/mkldnn_convolution_layer.cpp | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/caffe/layers/mkldnn_convolution_layer.cpp b/src/caffe/layers/mkldnn_convolution_layer.cpp index 75fac94c1..d65dbf3bf 100644 --- a/src/caffe/layers/mkldnn_convolution_layer.cpp +++ b/src/caffe/layers/mkldnn_convolution_layer.cpp @@ -261,7 +261,9 @@ void MKLDNNConvolutionLayer::InitConvolutionFwd(const vector* , *fwd_bottom_data_primitive, *fwd_weights_data_primitive , *fwd_bias_data_primitive, *fwd_top_data_memory)); } - fwd_bias_data->set_mkldnn_primitive(convFwd); + //fwd_bias_data->set_mkldnn_primitive(convFwd); //Wrong passed primitive! (For sure!) + MKLDNNPrimitive fwd_bias_data_primitive_transfer(fwd_bias_data_primitive); + fwd_bias_data->set_mkldnn_primitive(fwd_bias_data_primitive_transfer); } else { if(relu) { convFwd.reset(new convolution_relu_forward(*convReluFwd_pd @@ -277,7 +279,9 @@ void MKLDNNConvolutionLayer::InitConvolutionFwd(const vector* MKLDNNPrimitive fwd_bottom_data_primitive_transfer(fwd_bottom_data_primitive); fwd_bottom_data->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); - fwd_top_data->set_mkldnn_primitive(convFwd); + //fwd_top_data->set_mkldnn_primitive(convFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_top_data_memory_transfer(fwd_top_data_memory); + fwd_top_data->set_mkldnn_primitive(fwd_top_data_memory_transfer); //fwd_weights_data->set_mkldnn_primitive(convFwd); //Wrong passed primitive! (For sure!) MKLDNNPrimitive fwd_weights_data_primitive_transfer(fwd_weights_data_primitive); @@ -447,7 +451,9 @@ void MKLDNNConvolutionLayer::InitConvolutionBwd(const vector* , *bwdw_bottom_data_primitive, *bwdw_top_diff_primitive , *bwdw_weights_diff_memory, *bwdw_bias_diff_memory)); - bwdw_bias_diff->set_mkldnn_primitive(convBwdWeights); + //bwdw_bias_diff->set_mkldnn_primitive(convBwdWeights); //Wrong passed primitive! (For sure!) + MKLDNNPrimitive bwdw_bias_diff_memory_transfer(bwdw_bias_diff_memory); + bwdw_bias_diff->set_mkldnn_primitive(bwdw_bias_diff_memory_transfer); } else { convBwdWeights.reset(new convolution_backward_weights(*convBwdWeights_pd , *bwdw_bottom_data_primitive, *bwdw_top_diff_primitive @@ -458,7 +464,9 @@ void MKLDNNConvolutionLayer::InitConvolutionBwd(const vector* , *bwdd_top_diff_primitive, *bwdd_weights_data_primitive , *bwdd_bottom_diff_memory)); - bwdd_bottom_diff->set_mkldnn_primitive(convBwdData); + //bwdd_bottom_diff->set_mkldnn_primitive(convBwdData); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdd_bottom_diff_memory_transfer(bwdd_bottom_diff_memory); + bwdd_bottom_diff->set_mkldnn_primitive(bwdd_bottom_diff_memory_transfer); //bwdd_top_diff->set_mkldnn_primitive(convBwdData); //Wrong passed primitive! (TODO: Checking!) MKLDNNPrimitive bwdd_top_diff_primitive_transfer(bwdd_top_diff_primitive); @@ -477,7 +485,9 @@ void MKLDNNConvolutionLayer::InitConvolutionBwd(const vector* MKLDNNPrimitive bwdw_top_diff_primitive_transfer(bwdw_top_diff_primitive); bwdw_top_diff->set_mkldnn_primitive(bwdw_top_diff_primitive_transfer); - bwdw_weights_diff->set_mkldnn_primitive(convBwdWeights); + //bwdw_weights_diff->set_mkldnn_primitive(convBwdWeights); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdw_weights_diff_memory_transfer(bwdw_weights_diff_memory); + bwdw_weights_diff->set_mkldnn_primitive(bwdw_weights_diff_memory_transfer); // Names are for debugging purposes only. } From 63f05ebc56c9ab759ae9c54cb155d1c160c0cfff Mon Sep 17 00:00:00 2001 From: "Yu, Chong" Date: Wed, 5 Jul 2017 12:31:44 +0800 Subject: [PATCH 10/54] Fix the wrong passed primitive and memory in the MKLDNN innerproduct layer. --- .../layers/mkldnn_inner_product_layer.cpp | 80 +++++++++++-------- 1 file changed, 47 insertions(+), 33 deletions(-) diff --git a/src/caffe/layers/mkldnn_inner_product_layer.cpp b/src/caffe/layers/mkldnn_inner_product_layer.cpp index d2fe6cfaa..1c92669c1 100644 --- a/src/caffe/layers/mkldnn_inner_product_layer.cpp +++ b/src/caffe/layers/mkldnn_inner_product_layer.cpp @@ -235,18 +235,24 @@ void MKLDNNInnerProductLayer::InitInnerProductFwd(const vectorset_mkldnn_primitive(ipFwd); //Wrong passed primitive! (TODO: Checking!) - //MKLDNNPrimitive fwd_bottom_data_primitive_transfer(fwd_bottom_data_primitive); - //fwd_bottom_data->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); + //fwd_bottom_data->set_mkldnn_primitive(ipFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_bottom_data_primitive_transfer(fwd_bottom_data_primitive); + fwd_bottom_data->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); - fwd_top_data->set_mkldnn_primitive(ipFwd); - - fwd_weights_data->set_mkldnn_primitive(ipFwd); //Wrong passed primitive! (TODO: Checking!) - //MKLDNNPrimitive fwd_weights_data_primitive_transfer(fwd_weights_data_primitive); - //fwd_weights_data->set_mkldnn_primitive(fwd_weights_data_primitive_transfer); + //fwd_top_data->set_mkldnn_primitive(ipFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_top_data_memory_transfer(fwd_top_data_memory); + fwd_top_data->set_mkldnn_primitive(fwd_top_data_memory_transfer); - if (this->bias_term_) - fwd_bias_data->set_mkldnn_primitive(ipFwd); + //fwd_weights_data->set_mkldnn_primitive(ipFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_weights_data_primitive_transfer(fwd_weights_data_primitive); + fwd_weights_data->set_mkldnn_primitive(fwd_weights_data_primitive_transfer); + + if (this->bias_term_) + { + //fwd_bias_data->set_mkldnn_primitive(ipFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_bias_data_primitive_transfer(fwd_bias_data_primitive); + fwd_bias_data->set_mkldnn_primitive(fwd_bias_data_primitive_transfer); + } } template @@ -416,29 +422,37 @@ void MKLDNNInnerProductLayer::InitInnerProductBwd(const vectorset_mkldnn_primitive(ipBwdData); + //bwdd_bottom_diff->set_mkldnn_primitive(ipBwdData); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdd_bottom_diff_memory_transfer(bwdd_bottom_diff_memory); + bwdd_bottom_diff->set_mkldnn_primitive(bwdd_bottom_diff_memory_transfer); - bwdd_top_diff->set_mkldnn_primitive(ipBwdData); //Wrong passed primitive! (TODO: Checking!) - //MKLDNNPrimitive bwdd_top_diff_primitive_transfer(bwdd_top_diff_primitive); - //bwdd_top_diff->set_mkldnn_primitive(bwdd_top_diff_primitive_transfer); + //bwdd_top_diff->set_mkldnn_primitive(ipBwdData); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdd_top_diff_primitive_transfer(bwdd_top_diff_primitive); + bwdd_top_diff->set_mkldnn_primitive(bwdd_top_diff_primitive_transfer); - bwdd_weights_data->set_mkldnn_primitive(ipBwdData); //Wrong passed primitive! (TODO: Checking!) - //MKLDNNPrimitive bwdd_weights_data_primitive_transfer(bwdd_weights_data_primitive); - //bwdd_weights_data->set_mkldnn_primitive(bwdd_weights_data_primitive_transfer); + //bwdd_weights_data->set_mkldnn_primitive(ipBwdData); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdd_weights_data_primitive_transfer(bwdd_weights_data_primitive); + bwdd_weights_data->set_mkldnn_primitive(bwdd_weights_data_primitive_transfer); - bwdw_bottom_data->set_mkldnn_primitive(ipBwdWeights); //Wrong passed primitive! (TODO: Checking!) - //MKLDNNPrimitive bwdw_bottom_data_primitive_transfer(bwdw_bottom_data_primitive); - //bwdw_bottom_data->set_mkldnn_primitive(bwdw_bottom_data_primitive_transfer); + //bwdw_bottom_data->set_mkldnn_primitive(ipBwdWeights); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdw_bottom_data_primitive_transfer(bwdw_bottom_data_primitive); + bwdw_bottom_data->set_mkldnn_primitive(bwdw_bottom_data_primitive_transfer); - bwdw_top_diff->set_mkldnn_primitive(ipBwdWeights); //Wrong passed primitive! (TODO: Checking!) - //MKLDNNPrimitive bwdw_top_diff_primitive_transfer(bwdw_top_diff_primitive); - //bwdw_top_diff->set_mkldnn_primitive(bwdw_top_diff_primitive_transfer); + //bwdw_top_diff->set_mkldnn_primitive(ipBwdWeights); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdw_top_diff_primitive_transfer(bwdw_top_diff_primitive); + bwdw_top_diff->set_mkldnn_primitive(bwdw_top_diff_primitive_transfer); - bwdw_weights_diff->set_mkldnn_primitive(ipBwdWeights); + //bwdw_weights_diff->set_mkldnn_primitive(ipBwdWeights); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdw_weights_diff_memory_transfer(bwdw_weights_diff_memory); + bwdw_weights_diff->set_mkldnn_primitive(bwdw_weights_diff_memory_transfer); if (this->bias_term_) - bwdw_bias_diff->set_mkldnn_primitive(ipBwdWeights); + { + //bwdw_bias_diff->set_mkldnn_primitive(ipBwdWeights); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdw_bias_diff_memory_transfer(bwdw_bias_diff_memory); + bwdw_bias_diff->set_mkldnn_primitive(bwdw_bias_diff_memory_transfer); + } } @@ -482,9 +496,9 @@ void MKLDNNInnerProductLayer::Backward_cpu(const vector*>& to else { LOG(INFO) << "Debug: Top prv diff is NULL!"; - LOG(INFO) << "Debug: Top cpu diff: " << *top[0]->cpu_diff(); - } - + LOG(INFO) << "Debug: Top cpu diff: " << *top[0]->cpu_diff(); + } + if (this->blobs_[0]->prv_data() != NULL) { LOG(INFO) << "Debug: Weights prv data from blobs_[0]: " << *this->blobs_[0]->prv_data(); @@ -492,9 +506,9 @@ void MKLDNNInnerProductLayer::Backward_cpu(const vector*>& to else { LOG(INFO) << "Debug: Weights prv data is NULL!"; - LOG(INFO) << "Debug: Weights cpu data: " << *this->blobs_[0]->cpu_data(); - } - //Before submit, so get_prv_ptr() always has the value + LOG(INFO) << "Debug: Weights cpu data: " << *this->blobs_[0]->cpu_data(); + } + //Before submit, so get_prv_ptr() always has the value LOG(INFO) << "Debug: Weights prv data from get_prv_ptr: " << *bwdd_weights_data->get_prv_ptr(); #endif ipBwdData.submit(); @@ -505,8 +519,8 @@ void MKLDNNInnerProductLayer::Backward_cpu(const vector*>& to } else { - LOG(INFO) << "Debug: Bottom prv diff is NULL!"; - LOG(INFO) << "Debug: Bottom cpu diff: " << *bottom[0]->cpu_diff(); + LOG(INFO) << "Debug: Bottom prv diff is NULL!"; + LOG(INFO) << "Debug: Bottom cpu diff: " << *bottom[0]->cpu_diff(); } #endif PERFORMANCE_MEASUREMENT_END_ID(perf_id_bw_); From 2311687ddc7c98f74b19ddae454233b782001af1 Mon Sep 17 00:00:00 2001 From: "Yu, Chong" Date: Wed, 5 Jul 2017 16:44:06 +0800 Subject: [PATCH 11/54] Fix the wrong passed primitive and memory in the MKLDNN pooling layer. --- src/caffe/layers/mkldnn_pooling_layer.cpp | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/caffe/layers/mkldnn_pooling_layer.cpp b/src/caffe/layers/mkldnn_pooling_layer.cpp index 849abd0f3..2ed89f98e 100644 --- a/src/caffe/layers/mkldnn_pooling_layer.cpp +++ b/src/caffe/layers/mkldnn_pooling_layer.cpp @@ -275,7 +275,7 @@ void MKLDNNPoolingLayer::InitPoolingFwd(const vector*>& botto fwd_top_data.reset(new MKLDNNData(usr_top_data_mpd, prv_fwd_top_data_mpd, top[0], this)); fwd_top_data_memory = fwd_top_data->create_output_memory(); - if ( propagation == prop_kind::forward_training && + if (propagation == prop_kind::forward_training && pooling_algorithm != algorithm::pooling_avg_exclude_padding && pooling_algorithm != algorithm::pooling_avg_include_padding) { indices_pd.reset(new MemPD(poolingFwd_pd->workspace_primitive_desc())); @@ -284,8 +284,13 @@ void MKLDNNPoolingLayer::InitPoolingFwd(const vector*>& botto } else { poolingFwd.reset(new pooling_forward(*poolingFwd_pd, *fwd_bottom_data_primitive, *fwd_top_data_memory)); } - fwd_bottom_data->set_mkldnn_primitive(poolingFwd); - fwd_top_data->set_mkldnn_primitive(poolingFwd); + //fwd_bottom_data->set_mkldnn_primitive(poolingFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_bottom_data_primitive_transfer(fwd_bottom_data_primitive); + fwd_bottom_data->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); + + //fwd_top_data->set_mkldnn_primitive(poolingFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_top_data_memory_transfer(fwd_top_data_memory); + fwd_top_data->set_mkldnn_primitive(fwd_top_data_memory_transfer); } // TODO(Yangqing): Is there a faster way to do pooling in the channel-first @@ -440,8 +445,13 @@ void MKLDNNPoolingLayer::InitPoolingBwd(const vector*>& top else poolingBwd.reset(new pooling_backward(*poolingBwd_pd, *bwd_top_diff_primitive, *bwd_bottom_diff_memory)); - bwd_bottom_diff->set_mkldnn_primitive(poolingBwd); - bwd_top_diff->set_mkldnn_primitive(poolingBwd); + //bwd_bottom_diff->set_mkldnn_primitive(poolingBwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_bottom_diff_memory_transfer(bwd_bottom_diff_memory); + bwd_bottom_diff->set_mkldnn_primitive(bwd_bottom_diff_memory_transfer); + + //bwd_top_diff->set_mkldnn_primitive(poolingBwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_top_diff_primitive_transfer(bwd_top_diff_primitive); + bwd_top_diff->set_mkldnn_primitive(bwd_top_diff_primitive_transfer); } template From 3b8bec66a90725a6805915b729a8f88a0f679385 Mon Sep 17 00:00:00 2001 From: "Gong, Jiong" Date: Wed, 5 Jul 2017 18:01:22 +0800 Subject: [PATCH 12/54] use prv diff instead of cpu diff when computation sgd update --- include/caffe/multinode/multi_sync.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/caffe/multinode/multi_sync.hpp b/include/caffe/multinode/multi_sync.hpp index 2d4c566ae..d466abfa7 100644 --- a/include/caffe/multinode/multi_sync.hpp +++ b/include/caffe/multinode/multi_sync.hpp @@ -63,7 +63,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace caffe { -#define CAN_USE_PRV(param) false //(param->prv_diff() && (param->prv_diff_count() == param->count())) +#define CAN_USE_PRV(param) (param->prv_diff()) template class MultiSync : public MultiSolver::Callback { From d40f849639632c66abddb3cc29705874deff5b80 Mon Sep 17 00:00:00 2001 From: "Gong, Jiong" Date: Wed, 5 Jul 2017 22:41:29 +0800 Subject: [PATCH 13/54] add count check --- include/caffe/multinode/multi_sync.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/caffe/multinode/multi_sync.hpp b/include/caffe/multinode/multi_sync.hpp index d466abfa7..08c8aed91 100644 --- a/include/caffe/multinode/multi_sync.hpp +++ b/include/caffe/multinode/multi_sync.hpp @@ -63,7 +63,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace caffe { -#define CAN_USE_PRV(param) (param->prv_diff()) +#define CAN_USE_PRV(param) (param->prv_diff() && (param->prv_diff_count() == param->count())) template class MultiSync : public MultiSolver::Callback { From 98d6ece219f4c39ff51d1983394cd9e9ba76a20f Mon Sep 17 00:00:00 2001 From: "Yu, Chong" Date: Wed, 5 Jul 2017 22:42:10 +0800 Subject: [PATCH 14/54] Fix the wrong passed primitive and memory in the MKLDNN relu layer. --- src/caffe/layers/mkldnn_relu_layer.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/caffe/layers/mkldnn_relu_layer.cpp b/src/caffe/layers/mkldnn_relu_layer.cpp index 273e834d8..c1b63a053 100644 --- a/src/caffe/layers/mkldnn_relu_layer.cpp +++ b/src/caffe/layers/mkldnn_relu_layer.cpp @@ -129,9 +129,13 @@ void MKLDNNReLULayer::InitReLUFwd(const vector*>& bottom, con fwd_top_data_memory = fwd_top_data->create_output_memory(inplace); reluFwd.reset(new relu_forward(*reluFwd_pd, *fwd_bottom_data_primitive, *fwd_top_data_memory)); - fwd_bottom_data->set_mkldnn_primitive(reluFwd); - fwd_top_data->set_mkldnn_primitive(reluFwd); + //fwd_bottom_data->set_mkldnn_primitive(reluFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_bottom_data_primitive_transfer(fwd_bottom_data_primitive); + fwd_bottom_data->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); + //fwd_top_data->set_mkldnn_primitive(reluFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_top_data_memory_transfer(fwd_top_data_memory); + fwd_top_data->set_mkldnn_primitive(fwd_top_data_memory_transfer); } @@ -269,8 +273,13 @@ void MKLDNNReLULayer::InitReLUBwd(const vector*>& top bwd_bottom_diff_memory = bwd_bottom_diff->create_output_memory(inplace); reluBwd.reset(new relu_backward(*reluBwd_pd, *fwd_bottom_data_primitive, *bwd_top_diff_primitive, *bwd_bottom_diff_memory)); - bwd_top_diff->set_mkldnn_primitive(reluBwd); - bwd_bottom_diff->set_mkldnn_primitive(reluBwd); + //bwd_top_diff->set_mkldnn_primitive(reluBwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_top_diff_primitive_transfer(bwd_top_diff_primitive); + bwd_top_diff->set_mkldnn_primitive(bwd_top_diff_primitive_transfer); + + //bwd_bottom_diff->set_mkldnn_primitive(reluBwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_bottom_diff_memory_transfer(bwd_bottom_diff_memory); + bwd_bottom_diff->set_mkldnn_primitive(bwd_bottom_diff_memory_transfer); } template From 37adb20079c1d77836becd83568349dae20ca884 Mon Sep 17 00:00:00 2001 From: "Yu, Chong" Date: Thu, 6 Jul 2017 08:48:04 +0800 Subject: [PATCH 15/54] Fix the wrong passed primitive and memory in the MKLDNN batch norm layer. --- src/caffe/layers/mkldnn_batch_norm_layer.cpp | 22 ++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/caffe/layers/mkldnn_batch_norm_layer.cpp b/src/caffe/layers/mkldnn_batch_norm_layer.cpp index dd1b7f7b6..4db92b943 100644 --- a/src/caffe/layers/mkldnn_batch_norm_layer.cpp +++ b/src/caffe/layers/mkldnn_batch_norm_layer.cpp @@ -246,8 +246,13 @@ void MKLDNNBatchNormLayer::InitBatchNorm(const vector*>& bott } } - fwd_bottom_data->set_mkldnn_primitive(BatchNormFwd); - fwd_top_data->set_mkldnn_primitive(BatchNormFwd); + //fwd_bottom_data->set_mkldnn_primitive(BatchNormFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_bottom_data_primitive_transfer(input_primitive); + fwd_bottom_data->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); + + //fwd_top_data->set_mkldnn_primitive(BatchNormFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_top_data_memory_transfer(output_memory); + fwd_top_data->set_mkldnn_primitive(fwd_top_data_memory_transfer); //Fix: MKLDNN batch norm only support 4D memory descriptor! Use 4D for calculation and reshape to 2D for output! bool has_spatial = (bottom[0]->shape().size() != 2); @@ -259,8 +264,8 @@ void MKLDNNBatchNormLayer::InitBatchNorm(const vector*>& bott #ifdef DEBUG LOG(INFO) << "size of bottom blob: " << bottom[0]->shape().size(); LOG(INFO) << "MKLDNN batch norm only support 4D memory descriptor! Use 4D for calculation and reshape to 2D for output!"; -#endif - vector top_shape; +#endif + vector top_shape; top_shape.push_back(bottom[0]->num()); top_shape.push_back(bottom[0]->channels()); top[0]->Reshape(top_shape); @@ -413,8 +418,13 @@ void MKLDNNBatchNormLayer::InitBatchNormBwd( *bwd_top_diff_primitive, *bwd_bottom_diff_memory)); } - bwd_top_diff->set_mkldnn_primitive(BatchNormBwd); - bwd_bottom_diff->set_mkldnn_primitive(BatchNormBwd); + //bwd_top_diff->set_mkldnn_primitive(BatchNormBwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_top_diff_primitive_transfer(bwd_top_diff_primitive); + bwd_top_diff->set_mkldnn_primitive(bwd_top_diff_primitive_transfer); + + //bwd_bottom_diff->set_mkldnn_primitive(BatchNormBwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_bottom_diff_memory_transfer(bwd_bottom_diff_memory); + bwd_bottom_diff->set_mkldnn_primitive(bwd_bottom_diff_memory_transfer); } template From 77dc92ff58f53a894174bc1a9bb4af468ffa81d8 Mon Sep 17 00:00:00 2001 From: Feng Tian Date: Tue, 4 Jul 2017 11:22:53 +0800 Subject: [PATCH 16/54] sync BVLC latest codes in "python" dir except GPU changes to Intel Caffe Change-Id: Ide342386987c66616da22def9dbcccc342b8bd52 --- .../train_test_singleFrame_RGB.prototxt | 2 - python/CMakeLists.txt | 6 +-- python/caffe/__init__.py | 2 +- python/caffe/_caffe.cpp | 21 ++++++++ python/caffe/draw.py | 6 +-- python/caffe/io.py | 4 +- python/caffe/net_spec.py | 4 ++ python/caffe/pycaffe.py | 15 +++++- python/caffe/test/test_draw.py | 37 ++++++++++++++ python/caffe/test/test_net.py | 51 ++++++++++++++----- python/caffe/test/test_net_spec.py | 8 +++ 11 files changed, 130 insertions(+), 26 deletions(-) create mode 100644 python/caffe/test/test_draw.py diff --git a/examples/LRCN_activity_recognition/train_test_singleFrame_RGB.prototxt b/examples/LRCN_activity_recognition/train_test_singleFrame_RGB.prototxt index 8663afe45..26e4ddc55 100644 --- a/examples/LRCN_activity_recognition/train_test_singleFrame_RGB.prototxt +++ b/examples/LRCN_activity_recognition/train_test_singleFrame_RGB.prototxt @@ -13,7 +13,6 @@ layer { mean_value: 103.939 mean_value: 116.779 mean_value: 123.68 - flow: false } image_data_param { source: "ucf101_singleFrame_RGB_train_split1.txt" @@ -38,7 +37,6 @@ layer { mean_value: 103.939 mean_value: 116.779 mean_value: 123.68 - flow: false } image_data_param { source: "ucf101_singleFrame_RGB_test_split1.txt" diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index bf492a24b..c53299d26 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -3,13 +3,13 @@ if(NOT HAVE_PYTHON) return() endif() -include_directories(${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIR} ${Boost_INCLUDE_DIRS}) file(GLOB_RECURSE python_srcs ${PROJECT_SOURCE_DIR}/python/*.cpp) add_library(pycaffe SHARED ${python_srcs}) -target_link_libraries(pycaffe ${Caffe_LINK} ${PYTHON_LIBRARIES} ${Boost_LIBRARIES}) -set_target_properties(pycaffe PROPERTIES PREFIX "" OUTPUT_NAME "_caffe") caffe_default_properties(pycaffe) +set_target_properties(pycaffe PROPERTIES PREFIX "" OUTPUT_NAME "_caffe") +target_include_directories(pycaffe PUBLIC ${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIR}) +target_link_libraries(pycaffe PUBLIC ${Caffe_LINK} ${PYTHON_LIBRARIES}) if(UNIX OR APPLE) set(__linkname "${PROJECT_SOURCE_DIR}/python/caffe/_caffe.so") diff --git a/python/caffe/__init__.py b/python/caffe/__init__.py index a823b52e8..34c939a5b 100755 --- a/python/caffe/__init__.py +++ b/python/caffe/__init__.py @@ -35,7 +35,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # from .pycaffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, RMSPropSolver, AdaDeltaSolver, AdamSolver -from ._caffe import set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list, set_random_seed +from ._caffe import init_log, log, set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list, set_random_seed from ._caffe import __version__ from .proto.caffe_pb2 import TRAIN, TEST from .classifier import Classifier diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp index b7d509ee5..b9dc23e24 100644 --- a/python/caffe/_caffe.cpp +++ b/python/caffe/_caffe.cpp @@ -88,6 +88,23 @@ const int NPY_DTYPE = NPY_FLOAT32; void set_mode_cpu() { Caffe::set_mode(Caffe::CPU); } void set_mode_gpu() { Caffe::set_mode(Caffe::GPU); } +void InitLog() { + ::google::InitGoogleLogging(""); + ::google::InstallFailureSignalHandler(); +} +void InitLogLevel(int level) { + FLAGS_minloglevel = level; + InitLog(); +} +void InitLogLevelPipe(int level, bool stderr) { + FLAGS_minloglevel = level; + FLAGS_logtostderr = stderr; + InitLog(); +} +void Log(const string& s) { + LOG(INFO) << s; +} + void set_random_seed(unsigned int seed) { Caffe::set_random_seed(seed); } // For convenience, check that input files can be opened, and raise an @@ -327,6 +344,10 @@ BOOST_PYTHON_MODULE(_caffe) { bp::scope().attr("__version__") = AS_STRING(CAFFE_VERSION); // Caffe utility functions + bp::def("init_log", &InitLog); + bp::def("init_log", &InitLogLevel); + bp::def("init_log", &InitLogLevelPipe); + bp::def("log", &Log); bp::def("set_mode_cpu", &set_mode_cpu); bp::def("set_mode_gpu", &set_mode_gpu); bp::def("set_random_seed", &set_random_seed); diff --git a/python/caffe/draw.py b/python/caffe/draw.py index 46ef510a0..1f3ab6f7d 100755 --- a/python/caffe/draw.py +++ b/python/caffe/draw.py @@ -127,11 +127,11 @@ def get_layer_label(layer, rankdir): separator, layer.type, separator, - layer.convolution_param.kernel_size[0] if len(layer.convolution_param.kernel_size._values) else 1, + layer.convolution_param.kernel_size[0] if len(layer.convolution_param.kernel_size) else 1, separator, - layer.convolution_param.stride[0] if len(layer.convolution_param.stride._values) else 1, + layer.convolution_param.stride[0] if len(layer.convolution_param.stride) else 1, separator, - layer.convolution_param.pad[0] if len(layer.convolution_param.pad._values) else 0) + layer.convolution_param.pad[0] if len(layer.convolution_param.pad) else 0) elif layer.type == 'Pooling': pooling_types_dict = get_pooling_types_dict() node_label = '"%s%s(%s %s)%skernel size: %d%sstride: %d%spad: %d"' %\ diff --git a/python/caffe/io.py b/python/caffe/io.py index 72a2fc682..0df78e7f3 100755 --- a/python/caffe/io.py +++ b/python/caffe/io.py @@ -117,7 +117,7 @@ def array_to_datum(arr, label=None): if arr.dtype == np.uint8: datum.data = arr.tostring() else: - datum.float_data.extend(arr.flat) + datum.float_data.extend(arr.astype(float).flat) if label is not None: datum.label = label return datum @@ -303,7 +303,7 @@ def set_mean(self, in_, mean): m_min, m_max = mean.min(), mean.max() normal_mean = (mean - m_min) / (m_max - m_min) mean = resize_image(normal_mean.transpose((1,2,0)),in_shape[1:]).transpose((2,0,1)) * (m_max - m_min) + m_min - #aise ValueError('Mean shape incompatible with input shape.') + #raise ValueError('Mean shape incompatible with input shape.') self.mean[in_] = mean def set_input_scale(self, in_, scale): diff --git a/python/caffe/net_spec.py b/python/caffe/net_spec.py index b8d568dcb..10ee4d4f1 100755 --- a/python/caffe/net_spec.py +++ b/python/caffe/net_spec.py @@ -142,6 +142,10 @@ class Function(object): def __init__(self, type_name, inputs, params): self.type_name = type_name + for index, input in enumerate(inputs): + if not isinstance(input, Top): + raise TypeError('%s input %d is not a Top (type is %s)' % + (type_name, index, type(input))) self.inputs = inputs self.params = params self.ntop = self.params.get('ntop', 1) diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py index d105c3f27..bc606148d 100755 --- a/python/caffe/pycaffe.py +++ b/python/caffe/pycaffe.py @@ -79,6 +79,16 @@ def _Net_blob_loss_weights(self): self._blob_loss_weights)) return self._blob_loss_weights_dict +@property +def _Net_layer_dict(self): + """ + An OrderedDict (bottom to top, i.e., input to output) of network + layers indexed by name + """ + if not hasattr(self, '_layer_dict'): + self._layer_dict = OrderedDict(zip(self._layer_names, self.layers)) + return self._layer_dict + @property def _Net_params(self): @@ -139,7 +149,7 @@ def _Net_forward(self, blobs=None, start=None, end=None, **kwargs): if end is not None: end_ind = list(self._layer_names).index(end) - outputs = set([end] + blobs) + outputs = set(self.top_names[end] + blobs) else: end_ind = len(self.layers) - 1 outputs = set(self.outputs + blobs) @@ -187,7 +197,7 @@ def _Net_backward(self, diffs=None, start=None, end=None, **kwargs): if end is not None: end_ind = list(self._layer_names).index(end) - outputs = set([end] + diffs) + outputs = set(self.bottom_names[end] + diffs) else: end_ind = 0 outputs = set(self.inputs + diffs) @@ -357,6 +367,7 @@ def get_id_name(self): # Attach methods to Net. Net.blobs = _Net_blobs Net.blob_loss_weights = _Net_blob_loss_weights +Net.layer_dict = _Net_layer_dict Net.params = _Net_params Net.forward = _Net_forward Net.backward = _Net_backward diff --git a/python/caffe/test/test_draw.py b/python/caffe/test/test_draw.py new file mode 100644 index 000000000..835bb5df0 --- /dev/null +++ b/python/caffe/test/test_draw.py @@ -0,0 +1,37 @@ +import os +import unittest + +from google.protobuf import text_format + +import caffe.draw +from caffe.proto import caffe_pb2 + +def getFilenames(): + """Yields files in the source tree which are Net prototxts.""" + result = [] + + root_dir = os.path.abspath(os.path.join( + os.path.dirname(__file__), '..', '..', '..')) + assert os.path.exists(root_dir) + + for dirname in ('models', 'examples'): + dirname = os.path.join(root_dir, dirname) + assert os.path.exists(dirname) + for cwd, _, filenames in os.walk(dirname): + for filename in filenames: + filename = os.path.join(cwd, filename) + if filename.endswith('.prototxt') and 'solver' not in filename: + yield os.path.join(dirname, filename) + + +class TestDraw(unittest.TestCase): + def test_draw_net(self): + for filename in getFilenames(): + net = caffe_pb2.NetParameter() + with open(filename) as infile: + text_format.Merge(infile.read(), net) + caffe.draw.draw_net(net, 'LR') + + +if __name__ == "__main__": + unittest.main() diff --git a/python/caffe/test/test_net.py b/python/caffe/test/test_net.py index 85845e6c6..04198f06d 100755 --- a/python/caffe/test/test_net.py +++ b/python/caffe/test/test_net.py @@ -61,11 +61,11 @@ def simple_net_file(num_output): bias_filler { type: 'constant' value: 2 } } param { decay_mult: 1 } param { decay_mult: 0 } } - layer { type: 'InnerProduct' name: 'ip' bottom: 'conv' top: 'ip' + layer { type: 'InnerProduct' name: 'ip' bottom: 'conv' top: 'ip_blob' inner_product_param { num_output: """ + str(num_output) + """ weight_filler { type: 'gaussian' std: 2.5 } bias_filler { type: 'constant' value: -3 } } } - layer { type: 'SoftmaxWithLoss' name: 'loss' bottom: 'ip' bottom: 'label' + layer { type: 'SoftmaxWithLoss' name: 'loss' bottom: 'ip_blob' bottom: 'label' top: 'loss' }""") f.close() return f.name @@ -111,10 +111,35 @@ def test_memory(self): for bl in blobs: total += bl.data.sum() + bl.diff.sum() + def test_layer_dict(self): + layer_dict = self.net.layer_dict + self.assertEqual(list(layer_dict.keys()), list(self.net._layer_names)) + for i, name in enumerate(self.net._layer_names): + self.assertEqual(layer_dict[name].type, + self.net.layers[i].type) + def test_forward_backward(self): self.net.forward() self.net.backward() + def test_forward_start_end(self): + conv_blob=self.net.blobs['conv']; + ip_blob=self.net.blobs['ip_blob']; + sample_data=np.random.uniform(size=conv_blob.data.shape); + sample_data=sample_data.astype(np.float32); + conv_blob.data[:]=sample_data; + forward_blob=self.net.forward(start='ip',end='ip'); + self.assertIn('ip_blob',forward_blob); + + manual_forward=[]; + for i in range(0,conv_blob.data.shape[0]): + dot=np.dot(self.net.params['ip'][0].data, + conv_blob.data[i].reshape(-1)); + manual_forward.append(dot+self.net.params['ip'][1].data); + manual_forward=np.array(manual_forward); + + np.testing.assert_allclose(ip_blob.data,manual_forward,rtol=1e-3); + def test_clear_param_diffs(self): # Run a forward/backward step to have non-zero diffs self.net.forward() @@ -134,13 +159,13 @@ def test_top_bottom_names(self): self.assertEqual(self.net.top_names, OrderedDict([('data', ['data', 'label']), ('conv', ['conv']), - ('ip', ['ip']), + ('ip', ['ip_blob']), ('loss', ['loss'])])) self.assertEqual(self.net.bottom_names, OrderedDict([('data', []), ('conv', ['data']), ('ip', ['conv']), - ('loss', ['ip', 'label'])])) + ('loss', ['ip_blob', 'label'])])) def test_save_and_read(self): f = tempfile.NamedTemporaryFile(mode='w+', delete=False) @@ -224,12 +249,12 @@ class TestLevels(unittest.TestCase): """ def setUp(self): - self.f = tempfile.NamedTemporaryFile(mode='w+') + self.f = tempfile.NamedTemporaryFile(mode='w+', delete=False) self.f.write(self.TEST_NET) - self.f.flush() + self.f.close() def tearDown(self): - self.f.close() + os.remove(self.f.name) def check_net(self, net, blobs): net_blobs = [b for b in net.blobs.keys() if 'data' not in b] @@ -289,12 +314,12 @@ class TestStages(unittest.TestCase): """ def setUp(self): - self.f = tempfile.NamedTemporaryFile(mode='w+') + self.f = tempfile.NamedTemporaryFile(mode='w+', delete=False) self.f.write(self.TEST_NET) - self.f.flush() + self.f.close() def tearDown(self): - self.f.close() + os.remove(self.f.name) def check_net(self, net, blobs): net_blobs = [b for b in net.blobs.keys() if 'data' not in b] @@ -371,12 +396,12 @@ class TestAllInOne(unittest.TestCase): """ def setUp(self): - self.f = tempfile.NamedTemporaryFile(mode='w+') + self.f = tempfile.NamedTemporaryFile(mode='w+', delete=False) self.f.write(self.TEST_NET) - self.f.flush() + self.f.close() def tearDown(self): - self.f.close() + os.remove(self.f.name) def check_net(self, net, outputs): self.assertEqual(list(net.blobs['data'].shape), [1,1,10,10]) diff --git a/python/caffe/test/test_net_spec.py b/python/caffe/test/test_net_spec.py index d1b1f0af9..36520c2a5 100755 --- a/python/caffe/test/test_net_spec.py +++ b/python/caffe/test/test_net_spec.py @@ -115,3 +115,11 @@ def test_zero_tops(self): net_proto = silent_net() net = self.load_net(net_proto) self.assertEqual(len(net.forward()), 0) + + def test_type_error(self): + """Test that a TypeError is raised when a Function input isn't a Top.""" + data = L.DummyData(ntop=2) # data is a 2-tuple of Tops + r = r"^Silence input 0 is not a Top \(type is <(type|class) 'tuple'>\)$" + with self.assertRaisesRegexp(TypeError, r): + L.Silence(data, ntop=0) # should raise: data is a tuple, not a Top + L.Silence(*data, ntop=0) # shouldn't raise: each elt of data is a Top From 5b452432e58566c3f9d921c762a8b02a191d332f Mon Sep 17 00:00:00 2001 From: "Yu, Chong" Date: Thu, 6 Jul 2017 14:47:54 +0800 Subject: [PATCH 17/54] Fix the wrong passed primitive and memory in the MKLDNN concat layer. --- src/caffe/layers/mkldnn_concat_layer.cpp | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/caffe/layers/mkldnn_concat_layer.cpp b/src/caffe/layers/mkldnn_concat_layer.cpp index ee2cc5026..a0a1cd487 100644 --- a/src/caffe/layers/mkldnn_concat_layer.cpp +++ b/src/caffe/layers/mkldnn_concat_layer.cpp @@ -101,7 +101,7 @@ void MKLDNNConcatLayer::InitConcatFwd(const vector*>& bottom, LOG(INFO) << "size of bottom blob: " << bottom[0]->shape().size(); LOG(INFO) << "size of top blob: " << top[0]->shape().size(); LOG(INFO) << "MKLDNN concat layer only support 4D blob as input! Reshape the 2D input blob into 4D for calculation!"; -#endif +#endif vector bottom_4D_shape; int bottom_4D_height = 1; int bottom_4D_width = 1; @@ -168,9 +168,13 @@ void MKLDNNConcatLayer::InitConcatFwd(const vector*>& bottom, concatFwd.reset(new concat(*concatFwd_pd, fwd_input_primitives_at_, *fwd_output_memory)); for (auto i = 0; i < num_concats_; i++) { - fwd_bottom_data[i]->set_mkldnn_primitive(concatFwd); + //fwd_bottom_data[i]->set_mkldnn_primitive(concatFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_bottom_data_primitive_transfer(fwd_input_primitives_[i]); + fwd_bottom_data[i]->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); } - fwd_top_data->set_mkldnn_primitive(concatFwd); + //fwd_top_data->set_mkldnn_primitive(concatFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_top_data_memory_transfer(fwd_output_memory); + fwd_top_data->set_mkldnn_primitive(fwd_top_data_memory_transfer); } template @@ -237,11 +241,14 @@ void MKLDNNConcatLayer::InitConcatBwd(const vector*>& top, offsets[concat_dimension] += dims[concat_dimension]; - bwd_bottom_diff[i]->set_mkldnn_primitive(reorders[i]); + //bwd_bottom_diff[i]->set_mkldnn_primitive(reorders[i]); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_bottom_diff_memory_transfer(bwd_reorder_output_memory[i]); + bwd_bottom_diff[i]->set_mkldnn_primitive(bwd_bottom_diff_memory_transfer); } - bwd_top_diff->set_mkldnn_primitive(reorders[0]); - + //bwd_top_diff->set_mkldnn_primitive(reorders[0]); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_top_diff_memory_transfer(bwd_reorder_input_memory); + bwd_top_diff->set_mkldnn_primitive(bwd_top_diff_memory_transfer); } template From cdf4eca9a457a76ad71deca7be77d62e5f4214e2 Mon Sep 17 00:00:00 2001 From: "Jin, Ge" Date: Thu, 6 Jul 2017 12:59:46 -0400 Subject: [PATCH 18/54] Fix bug in RemoveBnScale Signed-off-by: Jin, Ge --- src/caffe/net.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index ecb1a1779..16cbec8d4 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -1280,8 +1280,8 @@ void Net::ShareTrainedLayersWith(const Net* other) { } //temp_net_param.mutable_compile_net_state()->set_bn_top_rename(other->bn_top_rename_); complete_net_param.CopyFrom(temp_net_param); - complete_net_param.clear_layer(); if (other->bn_scale_merge_) { + complete_net_param.clear_layer(); RecoverBNScaleMergedNet(&temp_net_param, &complete_net_param); } CopyTrainedLayersFrom(complete_net_param); From 980b57342cafc48e36cff8861419a0f7d6b97023 Mon Sep 17 00:00:00 2001 From: "Yu, Chong" Date: Fri, 7 Jul 2017 22:22:13 +0800 Subject: [PATCH 19/54] Fix the wrong passed primitive and memory in the MKLDNN lrn layer --- src/caffe/layers/mkldnn_lrn_layer.cpp | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/caffe/layers/mkldnn_lrn_layer.cpp b/src/caffe/layers/mkldnn_lrn_layer.cpp index c5eb48d1c..6c589c73e 100644 --- a/src/caffe/layers/mkldnn_lrn_layer.cpp +++ b/src/caffe/layers/mkldnn_lrn_layer.cpp @@ -198,8 +198,13 @@ void MKLDNNLRNLayer::InitLRNFwd(const vector*>& bottom, const } else { lrnFwd.reset(new lrn_forward(*lrnFwd_pd, *fwd_bottom_data_primitive, *fwd_top_data_memory)); } - fwd_bottom_data->set_mkldnn_primitive(lrnFwd); - fwd_top_data->set_mkldnn_primitive(lrnFwd); + //fwd_bottom_data->set_mkldnn_primitive(lrnFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_bottom_data_primitive_transfer(fwd_bottom_data_primitive); + fwd_bottom_data->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); + + //fwd_top_data->set_mkldnn_primitive(lrnFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_top_data_memory_transfer(fwd_top_data_memory); + fwd_top_data->set_mkldnn_primitive(fwd_top_data_memory_transfer); } @@ -340,8 +345,13 @@ void MKLDNNLRNLayer::InitLRNBwd(const vector*>& top bwd_top_diff_primitive = bwd_top_diff->create_input(false); lrnBwd.reset(new lrn_backward(*lrnBwd_pd, *fwd_bottom_data_primitive, *bwd_top_diff_primitive, *scratch_memory, *bwd_bottom_diff_memory)); - bwd_bottom_diff->set_mkldnn_primitive(lrnBwd); - bwd_top_diff->set_mkldnn_primitive(lrnBwd); + //bwd_bottom_diff->set_mkldnn_primitive(lrnBwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_bottom_diff_memory_transfer(bwd_bottom_diff_memory); + bwd_bottom_diff->set_mkldnn_primitive(bwd_bottom_diff_memory_transfer); + + //bwd_top_diff->set_mkldnn_primitive(lrnBwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_top_diff_primitive_transfer(bwd_top_diff_primitive); + bwd_top_diff->set_mkldnn_primitive(bwd_top_diff_primitive_transfer); } From 16097d97cefa3b03ae01c42b2c0378932323b4bd Mon Sep 17 00:00:00 2001 From: "Yu, Chong" Date: Mon, 10 Jul 2017 13:04:24 +0800 Subject: [PATCH 20/54] Fix the wrong passed primitive and memory in the MKLDNN split layer --- src/caffe/layers/mkldnn_split_layer.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/caffe/layers/mkldnn_split_layer.cpp b/src/caffe/layers/mkldnn_split_layer.cpp index 5e6cf9bab..ab2c5156a 100644 --- a/src/caffe/layers/mkldnn_split_layer.cpp +++ b/src/caffe/layers/mkldnn_split_layer.cpp @@ -163,10 +163,14 @@ void MKLDNNSplitLayer::InitSplitBwd(const vector*>& bottom, // there may be reorders to be done for inputs(tops' diffs) // so it match SplitBwd primitive inputs format expectations for(int i = 0; i < top.size(); ++i) { - bwd_top_diffs_[i]->set_mkldnn_primitive(splitBwd_); + //bwd_top_diffs_[i]->set_mkldnn_primitive(splitBwd_); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_top_diff_primitive_transfer(bwd_top_diff_primitives_[i]); + bwd_top_diffs_[i]->set_mkldnn_primitive(bwd_top_diff_primitive_transfer); } - bwd_bottom_diff_->set_mkldnn_primitive(splitBwd_); + //bwd_bottom_diff_->set_mkldnn_primitive(splitBwd_); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_bottom_diff_memory_transfer(bwd_bottom_diff_memory_); + bwd_bottom_diff_->set_mkldnn_primitive(bwd_bottom_diff_memory_transfer); } From 13ff1b4cfa1d04834dbcc72e331ca9b521086573 Mon Sep 17 00:00:00 2001 From: "Jin, Ge" Date: Mon, 10 Jul 2017 12:49:04 -0400 Subject: [PATCH 21/54] Split RemoveBNScale out of net.cpp Signed-off-by: Jin, Ge --- include/caffe/net.hpp | 1 - include/caffe/util/remove_batch_norm.hpp | 2 + src/caffe/net.cpp | 103 +---------------------- src/caffe/util/remove_batch_norm.cpp | 103 +++++++++++++++++++++++ 4 files changed, 106 insertions(+), 103 deletions(-) diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp index 0dc63436c..ba47be986 100644 --- a/include/caffe/net.hpp +++ b/include/caffe/net.hpp @@ -304,7 +304,6 @@ class Net { * @brief If find "Conv--BN--Scale" in current network, merge BN and Scale layer into Convolution * layers, this optimization only works in caffe TEST phase now. */ - static void RemoveBNScale(const NetParameter& param, NetParameter* param_compiled); static void GetBlobConsumers(std::vector &cnsmer_blobs, const string& blob_name_to_find, diff --git a/include/caffe/util/remove_batch_norm.hpp b/include/caffe/util/remove_batch_norm.hpp index c2e92f40f..316a4c022 100644 --- a/include/caffe/util/remove_batch_norm.hpp +++ b/include/caffe/util/remove_batch_norm.hpp @@ -69,5 +69,7 @@ void AdjustConvLayer(LayerParameter &conv_layer, template void RecoverBNScaleMergedNet(NetParameter * net_param, NetParameter* recovered_net_param); +template +void RemoveBNScale(const NetParameter& param, NetParameter* param_compiled); } #endif diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index 16cbec8d4..0b0ca8bb1 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -493,7 +493,7 @@ void Net::CompileNet(const NetParameter& param, NetParameter param_temp0; param_temp0.CopyFrom(param); param_temp0.clear_layer(); - RemoveBNScale(param, ¶m_temp0); + RemoveBNScale(param, ¶m_temp0); NetParameter param_temp; // temporary compiled param param_temp.CopyFrom(param_temp0); @@ -763,107 +763,6 @@ void Net::CompilationRuleThree(const NetParameter& param, return; } - -template -void Net::RemoveBNScale(const NetParameter& param, - NetParameter* param_compiled) { - // - In TEST Phase, if we detect sequential layers conv->batch norm ->scale, - // We will merge batch norm and scale layer into conv layer. - if(param.state().phase() != TEST) { - param_compiled->CopyFrom(param); - param_compiled->mutable_compile_net_state()->set_bn_scale_remove(false); - return ; - } - - bool bn_scale_remove = false; - bool is_net_init = param.compile_net_state().is_init(); - std::set layers_to_drop; - for (int i = 0; i < param.layer_size(); ++i) { - LayerParameter *layer_param = (const_cast(param)).mutable_layer(i); - bool layer_included = true; - bool bn_use_global_stats_set = true; - if (layer_param->type().compare("Convolution") == 0) { - std::vector child_layers_params; - GetBlobConsumers(child_layers_params, layer_param->top(0), param, i + 1 < param.layer_size() ? i + 1 : i); - const LayerParameter &child_layer_param = child_layers_params.size() > 0 ? *(child_layers_params[0]) : *layer_param; - // check whether child layer is BatchNorm - if (child_layer_param.type().compare("BatchNorm") == 0) { - BatchNormParameter bn_param = child_layer_param.batch_norm_param(); - if (is_net_init) { - //Testing Network init process - bool bn_use_global_stats = true; - if (bn_param.has_use_global_stats()) { - bn_use_global_stats = bn_param.use_global_stats(); - } - if (!bn_use_global_stats) { - //This bn layer's use_global_stats is set manually! Don't remove it. - //remained_bn_layer_names.push_back(child_layer_param.name()); - param_compiled->mutable_compile_net_state()->add_kept_bn_layers(child_layer_param.name()); - bn_use_global_stats_set = false; - } - } else { - int kept_bn_layers_num = param.compile_net_state().kept_bn_layers_size(); - bool in_kept_list = false; - for (int idx = 0; idx < kept_bn_layers_num; ++idx) { - if (child_layer_param.name().compare(param.compile_net_state().kept_bn_layers(idx)) == 0) { - in_kept_list = true; - break; - } - } - if (in_kept_list) { - bn_use_global_stats_set = false; - } - } - - if (!bn_use_global_stats_set) { - //Even in caffe TEST phase, current batch norm layer has set use_global_stats = false in protxt file, so we won't - //merge this layer into convolution layer. - param_compiled->add_layer()->CopyFrom(*layer_param); - continue; - } - std::vector grandchild_layers_params; - GetBlobConsumers(grandchild_layers_params, child_layer_param.top(0), param, i + 2 < param.layer_size() ? i + 2 : i); - const LayerParameter &grandchild_layer_param = (grandchild_layers_params.size() > 0) ? *(grandchild_layers_params[0]) : child_layer_param; - if (grandchild_layer_param.type().compare("Scale") == 0) { - MergeLayer(*layer_param, grandchild_layer_param); - AdjustConvLayer(*layer_param, child_layer_param, grandchild_layer_param, is_net_init); - if (bn_scale_remove == false) bn_scale_remove = true; - layers_to_drop.insert(child_layer_param.name()); - layers_to_drop.insert(grandchild_layer_param.name()); - } else if (&child_layer_param != &grandchild_layer_param) { - //In fact, conv-->batchnorm can also be optimized. In such case, we check the blob size of batch norm layer - //if is 3, it means current net hasn't used scale layer, this is equivalent to scale layer with all 1 weights and 0 bias - //if is 4 or 5, it means intel caffe compilation rule 1 works here, we can recover the scale layer from batch norm layer - MergeLayer(*layer_param, child_layer_param); - if (!is_net_init) { - shared_ptr scale_layer_param(new LayerParameter()); - RecoverScaleFromBN(child_layer_param, *scale_layer_param, (Dtype)1, (Dtype)0); - AdjustConvLayer(*layer_param, child_layer_param, *scale_layer_param, is_net_init); - } else { - AdjustConvLayer(*layer_param, child_layer_param, grandchild_layer_param, true); - } - if (bn_scale_remove == false) bn_scale_remove = true; - layers_to_drop.insert(child_layer_param.name()); - } - } - } - if (layers_to_drop.find(layer_param->name()) != layers_to_drop.end()) { - LOG_IF(INFO, Caffe::root_solver()) << "Dropped Layer: "<< layer_param->name() << std::endl; - layer_included = false; - // Remove dropped layer from the list of layers to be dropped - layers_to_drop.erase(layers_to_drop.find(layer_param->name())); - } - if (layer_included) { - if (layer_param->type().compare("BatchNorm") == 0) { - param_compiled->mutable_compile_net_state()->add_kept_bn_layers(layer_param->name()); - } - param_compiled->add_layer()->CopyFrom(*layer_param); - } - } - - param_compiled->mutable_compile_net_state()->set_bn_scale_remove(bn_scale_remove); - } - template void Net::GetBlobConsumers( std::vector& consumer_blobs, diff --git a/src/caffe/util/remove_batch_norm.cpp b/src/caffe/util/remove_batch_norm.cpp index 63c9b3f81..8c56639fc 100644 --- a/src/caffe/util/remove_batch_norm.cpp +++ b/src/caffe/util/remove_batch_norm.cpp @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "caffe/blob.hpp" #include "caffe/util/remove_batch_norm.hpp" #include "caffe/util/math_functions.hpp" +#include "caffe/net.hpp" namespace caffe { template @@ -188,6 +189,106 @@ void RecoverBNScaleMergedNet(NetParameter * net_param, NetParameter* recovered_n } } +template +void RemoveBNScale(const NetParameter& param, NetParameter* param_compiled) { + + // - In TEST Phase, if we detect sequential layers conv->batch norm ->scale, + // We will merge batch norm and scale layer into conv layer. + if(param.state().phase() != TEST) { + param_compiled->CopyFrom(param); + param_compiled->mutable_compile_net_state()->set_bn_scale_remove(false); + return ; + } + + bool bn_scale_remove = false; + bool is_net_init = param.compile_net_state().is_init(); + std::set layers_to_drop; + for (int i = 0; i < param.layer_size(); ++i) { + LayerParameter *layer_param = (const_cast(param)).mutable_layer(i); + bool layer_included = true; + bool bn_use_global_stats_set = true; + if (layer_param->type().compare("Convolution") == 0) { + std::vector child_layers_params; + Net::GetBlobConsumers(child_layers_params, layer_param->top(0), param, i + 1 < param.layer_size() ? i + 1 : i); + const LayerParameter &child_layer_param = child_layers_params.size() > 0 ? *(child_layers_params[0]) : *layer_param; + // check whether child layer is BatchNorm + if (child_layer_param.type().compare("BatchNorm") == 0) { + BatchNormParameter bn_param = child_layer_param.batch_norm_param(); + if (is_net_init) { + //Testing Network init process + bool bn_use_global_stats = true; + if (bn_param.has_use_global_stats()) { + bn_use_global_stats = bn_param.use_global_stats(); + } + if (!bn_use_global_stats) { + //This bn layer's use_global_stats is set manually! Don't remove it. + //remained_bn_layer_names.push_back(child_layer_param.name()); + param_compiled->mutable_compile_net_state()->add_kept_bn_layers(child_layer_param.name()); + bn_use_global_stats_set = false; + } + } else { + int kept_bn_layers_num = param.compile_net_state().kept_bn_layers_size(); + bool in_kept_list = false; + for (int idx = 0; idx < kept_bn_layers_num; ++idx) { + if (child_layer_param.name().compare(param.compile_net_state().kept_bn_layers(idx)) == 0) { + in_kept_list = true; + break; + } + } + if (in_kept_list) { + bn_use_global_stats_set = false; + } + } + + if (!bn_use_global_stats_set) { + //Even in caffe TEST phase, current batch norm layer has set use_global_stats = false in protxt file, so we won't + //merge this layer into convolution layer. + param_compiled->add_layer()->CopyFrom(*layer_param); + continue; + } + std::vector grandchild_layers_params; + Net::GetBlobConsumers(grandchild_layers_params, child_layer_param.top(0), param, i + 2 < param.layer_size() ? i + 2 : i); + const LayerParameter &grandchild_layer_param = (grandchild_layers_params.size() > 0) ? *(grandchild_layers_params[0]) : child_layer_param; + if (grandchild_layer_param.type().compare("Scale") == 0) { + MergeLayer(*layer_param, grandchild_layer_param); + AdjustConvLayer(*layer_param, child_layer_param, grandchild_layer_param, is_net_init); + if (bn_scale_remove == false) bn_scale_remove = true; + layers_to_drop.insert(child_layer_param.name()); + layers_to_drop.insert(grandchild_layer_param.name()); + } else if (&child_layer_param != &grandchild_layer_param) { + //In fact, conv-->batchnorm can also be optimized. In such case, we check the blob size of batch norm layer + //if is 3, it means current net hasn't used scale layer, this is equivalent to scale layer with all 1 weights and 0 bias + //if is 4 or 5, it means intel caffe compilation rule 1 works here, we can recover the scale layer from batch norm layer + MergeLayer(*layer_param, child_layer_param); + if (!is_net_init) { + shared_ptr scale_layer_param(new LayerParameter()); + RecoverScaleFromBN(child_layer_param, *scale_layer_param, (Dtype)1, (Dtype)0); + AdjustConvLayer(*layer_param, child_layer_param, *scale_layer_param, is_net_init); + } else { + AdjustConvLayer(*layer_param, child_layer_param, grandchild_layer_param, true); + } + if (bn_scale_remove == false) bn_scale_remove = true; + layers_to_drop.insert(child_layer_param.name()); + } + } + } + if (layers_to_drop.find(layer_param->name()) != layers_to_drop.end()) { + LOG_IF(INFO, Caffe::root_solver()) << "Dropped Layer: "<< layer_param->name() << std::endl; + layer_included = false; + // Remove dropped layer from the list of layers to be dropped + layers_to_drop.erase(layers_to_drop.find(layer_param->name())); + } + if (layer_included) { + if (layer_param->type().compare("BatchNorm") == 0) { + param_compiled->mutable_compile_net_state()->add_kept_bn_layers(layer_param->name()); + } + param_compiled->add_layer()->CopyFrom(*layer_param); + } + } + + param_compiled->mutable_compile_net_state()->set_bn_scale_remove(bn_scale_remove); +} + template void RecoverScaleFromBN(const LayerParameter& bn_layer_param, LayerParameter& scale_layer_param, float default_scale_weights, float default_scale_bias); template void RecoverScaleFromBN(const LayerParameter& bn_layer_param, LayerParameter& scale_layer_param, double default_scale_weights, double default_scale_bias); template void AdjustConvLayer(LayerParameter &conv_layer, @@ -200,4 +301,6 @@ template void AdjustConvLayer(LayerParameter &conv_layer, template void RecoverBNScaleMergedNet(NetParameter * net_param, NetParameter* recovered_net_param); template void RecoverBNScaleMergedNet(NetParameter * net_param, NetParameter* recovered_net_param); +template void RemoveBNScale(const NetParameter& param, NetParameter* param_compiled); +template void RemoveBNScale(const NetParameter& param, NetParameter* param_compiled); } From ceb8c0dcb76aa4d985e3d4a11c0746569684c587 Mon Sep 17 00:00:00 2001 From: "Yu, Chong" Date: Mon, 10 Jul 2017 16:28:22 +0800 Subject: [PATCH 22/54] Fix the regression of MKLDNN alexnet single node training cannot converge. --- src/caffe/layers/mkldnn_relu_layer.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/caffe/layers/mkldnn_relu_layer.cpp b/src/caffe/layers/mkldnn_relu_layer.cpp index c1b63a053..8bd5d3ab4 100644 --- a/src/caffe/layers/mkldnn_relu_layer.cpp +++ b/src/caffe/layers/mkldnn_relu_layer.cpp @@ -273,13 +273,14 @@ void MKLDNNReLULayer::InitReLUBwd(const vector*>& top bwd_bottom_diff_memory = bwd_bottom_diff->create_output_memory(inplace); reluBwd.reset(new relu_backward(*reluBwd_pd, *fwd_bottom_data_primitive, *bwd_top_diff_primitive, *bwd_bottom_diff_memory)); - //bwd_top_diff->set_mkldnn_primitive(reluBwd); //Wrong passed primitive! (TODO: Checking!) - MKLDNNPrimitive bwd_top_diff_primitive_transfer(bwd_top_diff_primitive); - bwd_top_diff->set_mkldnn_primitive(bwd_top_diff_primitive_transfer); - - //bwd_bottom_diff->set_mkldnn_primitive(reluBwd); //Wrong passed primitive! (TODO: Checking!) - MKLDNNPrimitive bwd_bottom_diff_memory_transfer(bwd_bottom_diff_memory); - bwd_bottom_diff->set_mkldnn_primitive(bwd_bottom_diff_memory_transfer); + //TODO: the transfer fix will lead AlexNet not converge. The root cause is the "inplace". + bwd_top_diff->set_mkldnn_primitive(reluBwd); //Wrong passed primitive! (TODO: Checking!) + //MKLDNNPrimitive bwd_top_diff_primitive_transfer(bwd_top_diff_primitive); + //bwd_top_diff->set_mkldnn_primitive(bwd_top_diff_primitive_transfer); + + bwd_bottom_diff->set_mkldnn_primitive(reluBwd); //Wrong passed primitive! (TODO: Checking!) + //MKLDNNPrimitive bwd_bottom_diff_memory_transfer(bwd_bottom_diff_memory); + //bwd_bottom_diff->set_mkldnn_primitive(bwd_bottom_diff_memory_transfer); } template From cd00c4b7a4afa1d40f1543ebb15bf1ef67389798 Mon Sep 17 00:00:00 2001 From: "Gong, Jiong" Date: Mon, 10 Jul 2017 19:29:36 +0800 Subject: [PATCH 23/54] Merge shuffled data split fix from Jin Ge. --- src/caffe/data_reader.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/caffe/data_reader.cpp b/src/caffe/data_reader.cpp index 01ebe50c8..fe70e3837 100644 --- a/src/caffe/data_reader.cpp +++ b/src/caffe/data_reader.cpp @@ -44,7 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "caffe/data_reader.hpp" #include "caffe/layers/data_layer.hpp" #include "caffe/proto/caffe.pb.h" - +#include "caffe/multinode/mlsl.hpp" namespace caffe { using boost::weak_ptr; @@ -147,18 +147,18 @@ void DataReader::Body::read_one(DBWrapper* dbw, QueuePair* qp) { CHECK(dbw); CHECK(qp); -#ifdef CAFFE_MLSL_SHUFFLE +#ifdef USE_MLSL string* data = qp->free_.pop(); static int mb=0; if(!mb) { /* move each node’s file position to its node ID – this part can be move to the initialization */ - for(int i=0;iNext(); } mb = 1; } *data = dbw->value(); qp->full_.push(data); - for(int i=0;iNext(); } #else @@ -191,8 +191,17 @@ DataReader::DBShuffle::DBShuffle(const LayerParameter& param):DBWrapper(param) { // randomly shuffle data LOG(INFO) << "Shuffling data"; +#ifdef USE_MLSL + mn::Distribution * distrib = mn::get_distrib(); + float fetch_seed; + fetch_seed = static_cast(caffe_rng_rand() % 15); + distrib->bcast(&fetch_seed, sizeof(fetch_seed)); + LOG(INFO) << "Random seed for shuffling: " << fetch_seed; + prefetch_rng_.reset(new Caffe::RNG(static_cast(fetch_seed))); +#else const unsigned int prefetch_rng_seed = caffe_rng_rand(); prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed)); +#endif ShuffleImages(); } From 0e66216e6301fde59d83dba12d43ba690b5074e9 Mon Sep 17 00:00:00 2001 From: "Gong, Jiong" Date: Mon, 10 Jul 2017 21:26:20 +0800 Subject: [PATCH 24/54] fix a potential stack access violation since constructor function does not accept arguments, therefore MPI init may get invalid argc and argv --- include/caffe/multinode/mlsl.hpp | 2 ++ src/caffe/multinode/mlsl.cpp | 28 ++++++++++++---------------- tools/caffe.cpp | 3 +++ 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/include/caffe/multinode/mlsl.hpp b/include/caffe/multinode/mlsl.hpp index b135e4673..b0d3d13d6 100644 --- a/include/caffe/multinode/mlsl.hpp +++ b/include/caffe/multinode/mlsl.hpp @@ -48,6 +48,8 @@ namespace caffe { #define MLSL_DEFAULT_COLOR -1 + void init(int* argc, char** argv[]); + inline void free(void *addr) { return MLSL::Environment::GetEnv().Free(addr); } diff --git a/src/caffe/multinode/mlsl.cpp b/src/caffe/multinode/mlsl.cpp index 1653c5692..31ffef7ee 100644 --- a/src/caffe/multinode/mlsl.cpp +++ b/src/caffe/multinode/mlsl.cpp @@ -41,26 +41,22 @@ #include "boost/thread/mutex.hpp" #include "caffe/multinode/mlsl.hpp" -namespace { - - __attribute__((constructor)) - void init(int argc, char **argv) { - static class initialize { - public: - initialize(int* argc, char** argv[]) { - MLSL::Environment::GetEnv().Init(argc, argv); - } - ~initialize() { - MLSL::Environment::GetEnv().Finalize(); - } - } __init{ &argc, &argv }; - } -} - namespace caffe { namespace mn { boost::mutex distrib_lock; std::map, boost::shared_ptr> distrib_map; + + void init(int* argc, char **argv[]) { + static class initialize { + public: + initialize(int* argc, char** argv[]) { + MLSL::Environment::GetEnv().Init(argc, argv); + } + ~initialize() { + MLSL::Environment::GetEnv().Finalize(); + } + } __init{ argc, argv }; + } shared_ptr create_distrib( int dataParts, int modelParts, int dataColor, int modelColor, diff --git a/tools/caffe.cpp b/tools/caffe.cpp index 3cd2234f6..231209127 100644 --- a/tools/caffe.cpp +++ b/tools/caffe.cpp @@ -692,6 +692,9 @@ int main(int argc, char** argv) { " compare collects layer data using inputs from other device"); // Run tool or show usage. caffe::GlobalInit(&argc, &argv); +#ifdef USE_MLSL + caffe::mn::init(&argc, &argv); +#endif if (argc == 2) { #ifdef WITH_PYTHON_LAYER try { From 3ed7e75e9d4688ebb4d616b357b689ffb91d1ecb Mon Sep 17 00:00:00 2001 From: "Yu, Chong" Date: Tue, 11 Jul 2017 13:43:20 +0800 Subject: [PATCH 25/54] Update the MKLDNN version to latest public commit. --- mkldnn.commit | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkldnn.commit b/mkldnn.commit index 4e6af52a3..e5352e307 100644 --- a/mkldnn.commit +++ b/mkldnn.commit @@ -1 +1 @@ -22bf25f29369d247098968837b21f3d1bdb2336e +264ad6619810c196971f8cd46a9cbcd480979a48 From 8d8e0caf90b9de348bbd3e3b531fa627221b2af8 Mon Sep 17 00:00:00 2001 From: "Gong, Jiong" Date: Thu, 13 Jul 2017 00:34:37 +0800 Subject: [PATCH 26/54] fix need reduce, should pass locally indexed param id --- include/caffe/multinode/multi_sync.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/caffe/multinode/multi_sync.hpp b/include/caffe/multinode/multi_sync.hpp index 08c8aed91..c46509149 100644 --- a/include/caffe/multinode/multi_sync.hpp +++ b/include/caffe/multinode/multi_sync.hpp @@ -203,7 +203,7 @@ namespace caffe { std::vector ¶m_ids = layer_param_ids[layer_id]; for (int i = 0; i < param_ids.size(); ++i) { - if (!layer->ParamNeedReduce(param_ids[i])) continue; + if (!layer->ParamNeedReduce(i)) continue; if (CAN_USE_PRV(net_params[param_ids[i]])) { layer->layerOp->GetParameterSet(i)->StartGradientComm((void *) net_params[param_ids[i]]->mutable_prv_diff()); } else { @@ -221,7 +221,7 @@ namespace caffe { std::vector ¶m_ids = layer_param_ids[layer_id]; for (int i=0; iParamNeedReduce(param_ids[i])) continue; + if (!layer->ParamNeedReduce(i)) continue; Dtype *delwt_buf{(Dtype *) layer->layerOp->GetParameterSet(i)->WaitGradientComm()}; if (delwt_buf) { if (CAN_USE_PRV(net_params[param_ids[i]])) { From ccef9d742b0d12fe38f3e4578175cdb219d43101 Mon Sep 17 00:00:00 2001 From: "Gong, Jiong" Date: Thu, 13 Jul 2017 03:09:07 +0800 Subject: [PATCH 27/54] use local variable to track the first read since static variable would impact all the data readers --- include/caffe/data_reader.hpp | 1 + src/caffe/data_reader.cpp | 7 +++---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/caffe/data_reader.hpp b/include/caffe/data_reader.hpp index c700586be..dff77199f 100644 --- a/include/caffe/data_reader.hpp +++ b/include/caffe/data_reader.hpp @@ -129,6 +129,7 @@ class DataReader { const LayerParameter param_; BlockingQueue > new_queue_pairs_; + bool first_read_; friend class DataReader; diff --git a/src/caffe/data_reader.cpp b/src/caffe/data_reader.cpp index fe70e3837..c78ff39c4 100644 --- a/src/caffe/data_reader.cpp +++ b/src/caffe/data_reader.cpp @@ -99,7 +99,7 @@ DataReader::QueuePair::~QueuePair() { DataReader::Body::Body(const LayerParameter& param) : param_(param), - new_queue_pairs_() { + new_queue_pairs_(), first_read_(true) { StartInternalThread(); } @@ -149,12 +149,11 @@ void DataReader::Body::read_one(DBWrapper* dbw, QueuePair* qp) { #ifdef USE_MLSL string* data = qp->free_.pop(); - static int mb=0; - if(!mb) { /* move each node’s file position to its node ID – this part can be move to the initialization */ + if(first_read_) { /* move each node’s file position to its node ID – this part can be move to the initialization */ for(int i=0;iNext(); } - mb = 1; + first_read_ = false; } *data = dbw->value(); qp->full_.push(data); From 8509d137f2a97752a51af657805cbbada2300953 Mon Sep 17 00:00:00 2001 From: "Jin, Ge" Date: Wed, 12 Jul 2017 16:25:00 -0400 Subject: [PATCH 28/54] Add focus test for remove bn feature Signed-off-by: Jin, Ge --- src/caffe/test/test_net.cpp | 224 ++++++++++++++++++++++++++++++++++++ 1 file changed, 224 insertions(+) diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp index ae4d3f03f..5b97a8bfb 100644 --- a/src/caffe/test/test_net.cpp +++ b/src/caffe/test/test_net.cpp @@ -2987,6 +2987,230 @@ class CompileNetTest : public ::testing::Test { } }; +TEST_F(CompileNetTest, TestRemoveBatchNorm1) { + const string& input_proto = + "name: 'TestNetwork' " + "layer { " + " name: 'data' " + " type: 'Data' " + " top: 'data' " + " top: 'label' " + "} " + "layer { " + " bottom: 'data' " + " name: 'conv' " + " top: 'conv' " + " type: 'Convolution' " + "} " + "layer { " + " bottom: 'conv' " + " name: 'bn' " + " top: 'conv' " + " type: 'BatchNorm' " + "} " + "layer { " + " name: 'loss' " + " type: 'SoftmaxWithLoss' " + " bottom: 'conv' " + " bottom: 'label' " + "} "; + + const string& output_proto = + "name: 'TestNetwork' " + "layer { " + " name: 'data' " + " type: 'Data' " + " top: 'data' " + " top: 'label' " + "} " + "layer { " + " bottom: 'data' " + " name: 'conv' " + " top: 'conv' " + " type: 'Convolution' " + "} " + "layer { " + " name: 'loss' " + " type: 'SoftmaxWithLoss' " + " bottom: 'conv' " + " bottom: 'label' " + "} "; + this->RunCompilerNetTest(input_proto, output_proto); +} + +TEST_F(CompileNetTest, TestRemoveBatchNorm2) { + const string& input_proto = + "name: 'TestNetwork' " + "layer { " + " name: 'data' " + " type: 'Data' " + " top: 'data' " + " top: 'label' " + "} " + "layer { " + " bottom: 'data' " + " name: 'fc1' " + " top: 'fc1' " + " type: 'InnerProduct' " + "} " + "layer { " + " bottom: 'fc1' " + " name: 'bn' " + " top: 'bn' " + " type: 'BatchNorm' " + "} " + "layer { " + " name: 'loss' " + " type: 'SoftmaxWithLoss' " + " bottom: 'bn' " + " bottom: 'label' " + "} "; + + const string& output_proto = + "name: 'TestNetwork' " + "layer { " + " name: 'data' " + " type: 'Data' " + " top: 'data' " + " top: 'label' " + "} " + "layer { " + " bottom: 'data' " + " name: 'fc1' " + " top: 'fc1' " + " type: 'InnerProduct' " + "} " + "layer { " + " bottom: 'fc1' " + " name: 'bn' " + " top: 'bn' " + " type: 'BatchNorm' " + "} " + "layer { " + " name: 'loss' " + " type: 'SoftmaxWithLoss' " + " bottom: 'bn' " + " bottom: 'label' " + "} "; + this->RunCompilerNetTest(input_proto, output_proto); +} + +TEST_F(CompileNetTest, TestRemoveBatchNorm3) { + const string& input_proto = + "name: 'TestNetwork' " + "layer { " + " name: 'data' " + " type: 'Data' " + " top: 'data' " + " top: 'label' " + "} " + "layer { " + " bottom: 'data' " + " name: 'conv' " + " top: 'conv' " + " type: 'Convolution' " + "} " + "layer { " + " bottom: 'conv' " + " name: 'bn' " + " top: 'conv' " + " type: 'BatchNorm' " + " batch_norm_param { " + " use_global_stats: false" + " }" + "} " + "layer { " + " name: 'loss' " + " type: 'SoftmaxWithLoss' " + " bottom: 'conv' " + " bottom: 'label' " + "} "; + + const string& output_proto = + "name: 'TestNetwork' " + "layer { " + " name: 'data' " + " type: 'Data' " + " top: 'data' " + " top: 'label' " + "} " + "layer { " + " bottom: 'data' " + " name: 'conv' " + " top: 'conv' " + " type: 'Convolution' " + "} " + "layer { " + " bottom: 'conv' " + " name: 'bn' " + " top: 'conv' " + " type: 'BatchNorm' " + " batch_norm_param { " + " use_global_stats: false" + " }" + "} " + "layer { " + " name: 'loss' " + " type: 'SoftmaxWithLoss' " + " bottom: 'conv' " + " bottom: 'label' " + "} "; + this->RunCompilerNetTest(input_proto, output_proto); +} + +TEST_F(CompileNetTest, TestRemoveBatchNorm4) { + const string& input_proto = + "name: 'TestNetwork' " + "layer { " + " name: 'data' " + " type: 'Data' " + " top: 'data' " + " top: 'label' " + "} " + "layer { " + " bottom: 'data' " + " name: 'conv' " + " top: 'conv' " + " type: 'Convolution' " + "} " + "layer { " + " bottom: 'conv' " + " name: 'bn' " + " top: 'conv' " + " type: 'BatchNorm' " + " batch_norm_param { " + " use_global_stats: true" + " }" + "} " + "layer { " + " name: 'loss' " + " type: 'SoftmaxWithLoss' " + " bottom: 'conv' " + " bottom: 'label' " + "} "; + + const string& output_proto = + "name: 'TestNetwork' " + "layer { " + " name: 'data' " + " type: 'Data' " + " top: 'data' " + " top: 'label' " + "} " + "layer { " + " bottom: 'data' " + " name: 'conv' " + " top: 'conv' " + " type: 'Convolution' " + "} " + "layer { " + " name: 'loss' " + " type: 'SoftmaxWithLoss' " + " bottom: 'conv' " + " bottom: 'label' " + "} "; + this->RunCompilerNetTest(input_proto, output_proto); +} #ifdef MKL2017_SUPPORTED // If BatchNorm of engine MKL2017 // produce blob consumed by From 90e6e1247d68c523898d230bd66ac17b4a176370 Mon Sep 17 00:00:00 2001 From: "Yu, Chong" Date: Thu, 13 Jul 2017 10:46:53 +0800 Subject: [PATCH 29/54] Fix pooling initialization using correct data type. Merge from the prv-inf branch. --- src/caffe/layers/mkldnn_pooling_layer.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/caffe/layers/mkldnn_pooling_layer.cpp b/src/caffe/layers/mkldnn_pooling_layer.cpp index 2ed89f98e..f5feb50b0 100644 --- a/src/caffe/layers/mkldnn_pooling_layer.cpp +++ b/src/caffe/layers/mkldnn_pooling_layer.cpp @@ -216,18 +216,20 @@ void MKLDNNPoolingLayer::InitPoolingFwd(const vector*>& botto // ---- Initialize memory descriptors ------------- typedef typename memory::primitive_desc MemPD; // short name for memory::primitive_desc - memory::format cmfmt = mfmt_nchw; + + shared_ptr usr_bottom_data_mpd(new MemPD({{bottom_tz}, mpcsn, mfmt_nchw}, cpu_engine)); + shared_ptr usr_top_data_mpd(new MemPD({{top_tz}, mpcsn, mfmt_nchw}, cpu_engine)); + if (bottom_data_is_prv) { shared_ptr > mem_descr = get_mkldnn_prv_descriptor(bottom[0]); cmfmt = static_cast(mem_descr->prv_memory_pd()->desc().data.format); + mpcsn = static_cast(mem_descr->prv_memory_pd()->desc().data.data_type); } shared_ptr init_fwd_bottom_md(new memory::desc({bottom_tz}, mpcsn, cmfmt)); shared_ptr init_fwd_top_md(new memory::desc({top_tz}, mpcsn, cmfmt)); - shared_ptr usr_bottom_data_mpd(new MemPD({{bottom_tz}, mpcsn, mfmt_nchw}, cpu_engine)); - shared_ptr usr_top_data_mpd(new MemPD({{top_tz}, mpcsn, mfmt_nchw}, cpu_engine)); // ---- Initialize pooling primitive descriptor ------------- pooling_forward::desc poolingFwd_desc(propagation, pooling_algorithm, *init_fwd_bottom_md,*init_fwd_top_md , {sh, sw}, {kh, kw}, {pt, pl}, {pb, pr}, padding_kind::zero); From 66fc5f3ea72faea309dbc543c2c7b025d0128d11 Mon Sep 17 00:00:00 2001 From: "Yu, Chong" Date: Thu, 13 Jul 2017 16:22:13 +0800 Subject: [PATCH 30/54] Fix the wrong passed primitive and memory in the MKLDNN element wise layer. --- src/caffe/layers/mkldnn_eltwise_layer.cpp | 28 +++++++++++++---------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/caffe/layers/mkldnn_eltwise_layer.cpp b/src/caffe/layers/mkldnn_eltwise_layer.cpp index 2a4a87c79..060467e82 100644 --- a/src/caffe/layers/mkldnn_eltwise_layer.cpp +++ b/src/caffe/layers/mkldnn_eltwise_layer.cpp @@ -201,9 +201,13 @@ void MKLDNNEltwiseLayer::InitEltwiseFwd(const vector*>& botto for (auto i = 0; i < num_bottoms_; i++) { - fwd_bottom_data[i]->set_mkldnn_primitive(eltwiseFwd); + //fwd_bottom_data[i]->set_mkldnn_primitive(eltwiseFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_bottom_data_primitive_transfer(fwd_bottom_data_primitives_[i]); + fwd_bottom_data[i]->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); } - fwd_top_data->set_mkldnn_primitive(eltwiseFwd); + //fwd_top_data->set_mkldnn_primitive(eltwiseFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_top_data_memory_transfer(fwd_top_data_memory); + fwd_top_data->set_mkldnn_primitive(fwd_top_data_memory_transfer); } @@ -214,9 +218,9 @@ void MKLDNNEltwiseLayer::Forward_cpu(const vector*>& bottom, if(eltwiseFwd_pd == NULL) InitEltwiseFwd(bottom, top); - for (auto i = 0; i < num_bottoms_; i++) - { - // making reorders if needed. + for (auto i = 0; i < num_bottoms_; i++) + { + // making reorders if needed. fwd_bottom_data[i]->sync_before_read(); } // update top that head at prv @@ -233,13 +237,13 @@ void MKLDNNEltwiseLayer::Backward_cpu(const vector*>& top , const vector& propagate_down , const vector*>& bottom) { - VLOG(1) << "MKLDNNEltwiseLayer::Backward_cpu: " << this->layer_param_.name(); - - for (int i = 0; i < num_bottoms_; ++i) - { - //Eltwise layer is not supporting multiplication coefficient in Backward due to lack of supporting scale and copy primitives in MKL-DNN - CHECK_EQ(coeffs_[i], Dtype(1)) << "Not supported yet"; - + VLOG(1) << "MKLDNNEltwiseLayer::Backward_cpu: " << this->layer_param_.name(); + + for (int i = 0; i < num_bottoms_; ++i) + { + //Eltwise layer is not supporting multiplication coefficient in Backward due to lack of supporting scale and copy primitives in MKL-DNN + CHECK_EQ(coeffs_[i], Dtype(1)) << "Not supported yet"; + bottom[i]->ShareDiff(*top[0]); } } From 8908173be0825cf5e588ed2c77568a84d6d8beaa Mon Sep 17 00:00:00 2001 From: "Yu, Chong" Date: Fri, 14 Jul 2017 14:12:57 +0800 Subject: [PATCH 31/54] Merge the fix that the extprv to prv converison can only skipped when both format and data type are the same. --- src/caffe/mkldnn_memory.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/caffe/mkldnn_memory.cpp b/src/caffe/mkldnn_memory.cpp index ddad67f57..bacb6ae61 100644 --- a/src/caffe/mkldnn_memory.cpp +++ b/src/caffe/mkldnn_memory.cpp @@ -212,10 +212,11 @@ void MKLDNNMemoryDescriptor::convert_from_extprv(shared_ptr_reorder_extprv2prv_pd == NULL) return; - if (this->_extprv_memory_pd->desc().data.format == this->_prv_memory_pd->desc().data.format) + if (this->_extprv_memory_pd->desc().data.format == this->_prv_memory_pd->desc().data.format && + this->_extprv_memory_pd->desc().data.data_type == this->_prv_memory_pd->desc().data.data_type) { #ifdef DEBUG - LOG(INFO) << "The format of _extprv_memory_pd and _prv_memory_pd is same, no need do conversion."; + LOG(INFO) << "The format and data_type of _extprv_memory_pd and _prv_memory_pd is same, no need do conversion."; #endif return; } From 60394be7fdfa042088e638ae946a3ca73fb524c2 Mon Sep 17 00:00:00 2001 From: xinanlin Date: Fri, 14 Jul 2017 14:22:00 +0800 Subject: [PATCH 32/54] add license for xbyak --- xbyak/COPYRIGHT | 47 ++++++++++++++++++++++++++++++++++++++++++ xbyak/xbyak.h | 45 ++++++++++++++++++++++++++++++++++++++++ xbyak/xbyak_bin2hex.h | 45 ++++++++++++++++++++++++++++++++++++++++ xbyak/xbyak_mnemonic.h | 45 ++++++++++++++++++++++++++++++++++++++++ xbyak/xbyak_util.h | 45 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 227 insertions(+) create mode 100644 xbyak/COPYRIGHT diff --git a/xbyak/COPYRIGHT b/xbyak/COPYRIGHT new file mode 100644 index 000000000..78d3140b8 --- /dev/null +++ b/xbyak/COPYRIGHT @@ -0,0 +1,47 @@ + +Copyright (c) 2007 MITSUNARI Shigeo +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. +Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. +Neither the name of the copyright owner nor the names of its contributors may +be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +THE POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +ソースコード形式かバイナリ形式か、変更するかしないかを問わず、以下の条件を満た +す場合に限り、再頒布および使用が許可されます。 + +ソースコードを再頒布する場合、上記の著作権表示、本条件一覧、および下記免責条項 +を含めること。 +バイナリ形式で再頒布する場合、頒布物に付属のドキュメント等の資料に、上記の著作 +権表示、本条件一覧、および下記免責条項を含めること。 +書面による特別の許可なしに、本ソフトウェアから派生した製品の宣伝または販売促進 +に、著作権者の名前またはコントリビューターの名前を使用してはならない。 +本ソフトウェアは、著作権者およびコントリビューターによって「現状のまま」提供さ +れており、明示黙示を問わず、商業的な使用可能性、および特定の目的に対する適合性 +に関する暗黙の保証も含め、またそれに限定されない、いかなる保証もありません。 +著作権者もコントリビューターも、事由のいかんを問わず、 損害発生の原因いかんを +問わず、かつ責任の根拠が契約であるか厳格責任であるか(過失その他の)不法行為で +あるかを問わず、仮にそのような損害が発生する可能性を知らされていたとしても、 +本ソフトウェアの使用によって発生した(代替品または代用サービスの調達、使用の +喪失、データの喪失、利益の喪失、業務の中断も含め、またそれに限定されない)直接 +損害、間接損害、偶発的な損害、特別損害、懲罰的損害、または結果損害について、 +一切責任を負わないものとします。 diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index d0cf6f9c3..31aa0a056 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -1,3 +1,48 @@ +/******************************************************************************* +* Copyright 2016-2017 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +/******************************************************************************* +* Copyright (c) 2007 MITSUNARI Shigeo +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* Redistributions of source code must retain the above copyright notice, this +* list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, +* this list of conditions and the following disclaimer in the documentation +* and/or other materials provided with the distribution. +* Neither the name of the copyright owner nor the names of its contributors may +* be used to endorse or promote products derived from this software without +* specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +* THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ + #pragma once #ifndef XBYAK_XBYAK_H_ #define XBYAK_XBYAK_H_ diff --git a/xbyak/xbyak_bin2hex.h b/xbyak/xbyak_bin2hex.h index 69ecdbfed..54e0d8ff1 100644 --- a/xbyak/xbyak_bin2hex.h +++ b/xbyak/xbyak_bin2hex.h @@ -1,3 +1,48 @@ +/******************************************************************************* +* Copyright 2016-2017 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +/******************************************************************************* +* Copyright (c) 2007 MITSUNARI Shigeo +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* Redistributions of source code must retain the above copyright notice, this +* list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, +* this list of conditions and the following disclaimer in the documentation +* and/or other materials provided with the distribution. +* Neither the name of the copyright owner nor the names of its contributors may +* be used to endorse or promote products derived from this software without +* specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +* THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ + enum { B00000000= 0, B00000001= 1, diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index ac5be9600..a781f0c30 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1,3 +1,48 @@ +/******************************************************************************* +* Copyright 2016-2017 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +/******************************************************************************* +* Copyright (c) 2007 MITSUNARI Shigeo +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* Redistributions of source code must retain the above copyright notice, this +* list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, +* this list of conditions and the following disclaimer in the documentation +* and/or other materials provided with the distribution. +* Neither the name of the copyright owner nor the names of its contributors may +* be used to endorse or promote products derived from this software without +* specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +* THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ + const char *getVersionString() const { return "4.87"; } void packssdw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6B); } void packsswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x63); } diff --git a/xbyak/xbyak_util.h b/xbyak/xbyak_util.h index 3a7c2c218..5854a1723 100644 --- a/xbyak/xbyak_util.h +++ b/xbyak/xbyak_util.h @@ -1,3 +1,48 @@ +/******************************************************************************* +* Copyright 2016-2017 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +/******************************************************************************* +* Copyright (c) 2007 MITSUNARI Shigeo +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* Redistributions of source code must retain the above copyright notice, this +* list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, +* this list of conditions and the following disclaimer in the documentation +* and/or other materials provided with the distribution. +* Neither the name of the copyright owner nor the names of its contributors may +* be used to endorse or promote products derived from this software without +* specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +* THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ + #ifndef XBYAK_XBYAK_UTIL_H_ #define XBYAK_XBYAK_UTIL_H_ From c0260a44d8d1bfb34a6005b3fa300d71df5a550f Mon Sep 17 00:00:00 2001 From: Haihao Shen Date: Thu, 13 Jul 2017 18:47:11 +0900 Subject: [PATCH 33/54] Support conv and relu fusion in training path --- src/caffe/layers/mkldnn_relu_layer.cpp | 4 ++ src/caffe/net.cpp | 81 ++++++++++---------------- src/caffe/proto/caffe.proto | 1 + 3 files changed, 36 insertions(+), 50 deletions(-) diff --git a/src/caffe/layers/mkldnn_relu_layer.cpp b/src/caffe/layers/mkldnn_relu_layer.cpp index 8bd5d3ab4..57e9be70d 100644 --- a/src/caffe/layers/mkldnn_relu_layer.cpp +++ b/src/caffe/layers/mkldnn_relu_layer.cpp @@ -70,6 +70,8 @@ void MKLDNNReLULayer::Reshape(const vector*>& bottom template void MKLDNNReLULayer::InitReLUFwd(const vector*>& bottom, const vector*>& top) { + if(this->layer_param_.relu_param().fuse()) return; + if (std::is_same::value) NOT_IMPLEMENTED; auto propagation = this->phase_ == TEST ? prop_kind::forward_scoring : prop_kind::forward_training; int32_t n = this->num_; @@ -143,6 +145,8 @@ template void MKLDNNReLULayer::Forward_cpu(const vector*>& bottom ,const vector*>& top) { + if(this->layer_param_.relu_param().fuse()) return; + VLOG(1) << "MKLDNNReLULayer::Forward_cpu: " << this->layer_param_.name(); #ifdef DEBUG LOG(INFO) << "MKLDNNReLULayer::Forward_cpu: " << this->layer_param_.name(); diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index 0b0ca8bb1..8795287dc 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -616,26 +616,8 @@ void Net::CompilationRuleTwo(const NetParameter& param, // then we can remove ReLU layer // and rename Convolution top blob after deleted ReLU's top // Note: Currently merging of convolution and relu layers is feasible - // only for caffe::TEST phase, as there is no Backward primitive of conv Relu - // If current layer is Convolution of MKLDNN engine.. - /* - //Old Structure: if ((A == TEST) && (B == 0) && ((C == ConvolutionParameter_Engine_MKLDNN) || ((D == ConvolutionParameter_Engine_DEFAULT) && ((E == 0 && F == string::npos)) || ((G == "" && H == 0 && I == string::npos))))) - //New tmp Structure: if ((A == TEST) && (B == 0) && ((C == ConvolutionParameter_Engine_MKLDNN) || (((D == ConvolutionParameter_Engine_DEFAULT) && ((E == 0 && F == string::npos))) || ((G == "" && H == 0 && I == string::npos))))) - //New Structure: if ((A == TEST) && (B == 0) && ((C == ConvolutionParameter_Engine_MKLDNN) || (((D == ConvolutionParameter_Engine_DEFAULT) && (E == 0 && F == string::npos)) || (G == "" && H == 0 && I == string::npos)))) - //Old Structure: - //if ((A == TEST) && - // (B == 0) && - // ((C == ConvolutionParameter_Engine_MKLDNN) - // || ((D == ConvolutionParameter_Engine_DEFAULT) && - // ((E == 0 - // && F == string::npos)) || - // ((G == "" && - // H == 0 && - // I == string::npos))))) - */ - if ((param.state().phase() == TEST) && - (layer_param->type().compare("Convolution") == 0) && + if ((layer_param->type().compare("Convolution") == 0) && ((layer_param->convolution_param().engine() == ConvolutionParameter_Engine_MKLDNN) || (((layer_param->convolution_param().engine() == ConvolutionParameter_Engine_DEFAULT) && (param.engine().compare(0, 6, "MKLDNN") == 0 @@ -652,20 +634,6 @@ void Net::CompilationRuleTwo(const NetParameter& param, // Consumer layer of blob produced by Conv // has to be ReLU layer with one Input Blob - /* - //Old Structure: if ((A == 0) && ((B == ReLUParameter_Engine_MKLDNN) || ((C == ReLUParameter_Engine_DEFAULT) && ((D == 0 && E == string::npos)) || ((F == "" && G == 0 && H == string::npos))))) - //New tmp Structure: if ((A == 0) && ((B == ReLUParameter_Engine_MKLDNN) || (((C == ReLUParameter_Engine_DEFAULT) && ((D == 0 && E == string::npos))) || ((F == "" && G == 0 && H == string::npos))))) - //New Structure: if ((A == 0) && ((B == ReLUParameter_Engine_MKLDNN) || (((C == ReLUParameter_Engine_DEFAULT) && (D == 0 && E == string::npos)) || (F == "" && G == 0 && H == string::npos)))) - //Old Structure: - //if ((A == 0) && - // ((B == ReLUParameter_Engine_MKLDNN) - // || ((C == ReLUParameter_Engine_DEFAULT) && - // ((D == 0 - // && E == string::npos)) || - // ((F == "" && - // G == 0 && - // H == string::npos))))) - */ if ((consumer_layer_param.type().compare("ReLU") == 0) && ((consumer_layer_param.relu_param().engine() == ReLUParameter_Engine_MKLDNN) || (((consumer_layer_param.relu_param().engine() == ReLUParameter_Engine_DEFAULT) && @@ -676,34 +644,47 @@ void Net::CompilationRuleTwo(const NetParameter& param, layer_param->engine().find(":DLA", 6) == string::npos)))) { string& convolution_top_blob_name = const_cast(layer_param->top(0)); - const string& scale_top_blob_name = consumer_layer_param.top(0); - // Mark Consumer layer (its name) as the one marked for dropping - layers_to_drop.insert(consumer_layer_param.name()); - // Replace Convolution top name with ReLU top name - convolution_top_blob_name.resize(scale_top_blob_name.size()); - convolution_top_blob_name.replace(0, - scale_top_blob_name.size(), - scale_top_blob_name); + if(param.state().phase() == TEST) { + const string& scale_top_blob_name = consumer_layer_param.top(0); + // Mark Consumer layer (its name) as the one marked for dropping + layers_to_drop.insert(consumer_layer_param.name()); + + // Replace Convolution top name with ReLU top name + convolution_top_blob_name.resize(scale_top_blob_name.size()); + convolution_top_blob_name.replace(0, + scale_top_blob_name.size(), + scale_top_blob_name); + } // set relu flag in convolution layer_param->mutable_convolution_param()->set_relu(true); float negative_slope1 = consumer_layer_param.relu_param().negative_slope(); layer_param->mutable_convolution_param()-> set_negative_slope(negative_slope1); + + if(param.state().phase() == TRAIN) { + if(i+1 < param.layer_size()) { + LayerParameter* relu_layer_param = + (const_cast(param)).mutable_layer(i+1); + relu_layer_param->mutable_relu_param()->set_fuse(true); + } + } } } - if (layers_to_drop.find(layer_param->name()) != layers_to_drop.end()) { - LOG_IF(INFO, Caffe::root_solver()) << "Dropped layer: " - << layer_param->name() << std::endl; - layer_included = false; - // Remove dropped layer from the list of layers to be dropped - layers_to_drop.erase(layers_to_drop.find(layer_param->name())); - } + if(param.state().phase() == TEST) { + if (layers_to_drop.find(layer_param->name()) != layers_to_drop.end()) { + LOG_IF(INFO, Caffe::root_solver()) << "Dropped layer: " + << layer_param->name() << std::endl; + layer_included = false; + // Remove dropped layer from the list of layers to be dropped + layers_to_drop.erase(layers_to_drop.find(layer_param->name())); + } - if (layer_included) { - param_compiled->add_layer()->CopyFrom(*layer_param); + if (layer_included) { + param_compiled->add_layer()->CopyFrom(*layer_param); + } } } } diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index c4c5228e5..20c32507f 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -1626,6 +1626,7 @@ message ReLUParameter { MKLDNN = 4; } optional Engine engine = 2 [default = DEFAULT]; + optional bool fuse = 3 [default = false]; } message ReshapeParameter { From fa4bc00d3c9f5347897ddbfdf3cad29a5a7d4635 Mon Sep 17 00:00:00 2001 From: Haihao Shen Date: Fri, 14 Jul 2017 16:10:57 +0900 Subject: [PATCH 34/54] Add SSD modified deploy prototxt under MKL2017 and MKLDNN engine --- .../SSD_300x300/deploy_mkl2017.prototxt | 1626 +++++++++++++++++ .../SSD_300x300/deploy_mkldnn.prototxt | 1626 +++++++++++++++++ 2 files changed, 3252 insertions(+) create mode 100644 models/intel_optimized_models/ssd/VGGNet/VOC0712/SSD_300x300/deploy_mkl2017.prototxt create mode 100644 models/intel_optimized_models/ssd/VGGNet/VOC0712/SSD_300x300/deploy_mkldnn.prototxt diff --git a/models/intel_optimized_models/ssd/VGGNet/VOC0712/SSD_300x300/deploy_mkl2017.prototxt b/models/intel_optimized_models/ssd/VGGNet/VOC0712/SSD_300x300/deploy_mkl2017.prototxt new file mode 100644 index 000000000..7e2ddbbbb --- /dev/null +++ b/models/intel_optimized_models/ssd/VGGNet/VOC0712/SSD_300x300/deploy_mkl2017.prototxt @@ -0,0 +1,1626 @@ +name: "VGG_VOC0712_SSD_300x300_deploy" +input: "data" +input_shape { + dim: 1 + dim: 3 + dim: 300 + dim: 300 +} +layer { + engine: "MKL2017" + name: "conv1_1" + type: "Convolution" + bottom: "data" + top: "conv1_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "relu1_1" + type: "ReLU" + bottom: "conv1_1" + top: "conv1_1" +} +layer { + engine: "MKL2017" + name: "conv1_2" + type: "Convolution" + bottom: "conv1_1" + top: "conv1_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "relu1_2" + type: "ReLU" + bottom: "conv1_2" + top: "conv1_2" +} +layer { + engine: "MKL2017" + name: "pool1" + type: "Pooling" + bottom: "conv1_2" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + engine: "MKL2017" + name: "conv2_1" + type: "Convolution" + bottom: "pool1" + top: "conv2_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "relu2_1" + type: "ReLU" + bottom: "conv2_1" + top: "conv2_1" +} +layer { + engine: "MKL2017" + name: "conv2_2" + type: "Convolution" + bottom: "conv2_1" + top: "conv2_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "relu2_2" + type: "ReLU" + bottom: "conv2_2" + top: "conv2_2" +} +layer { + engine: "MKL2017" + name: "pool2" + type: "Pooling" + bottom: "conv2_2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + engine: "MKL2017" + name: "conv3_1" + type: "Convolution" + bottom: "pool2" + top: "conv3_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "relu3_1" + type: "ReLU" + bottom: "conv3_1" + top: "conv3_1" +} +layer { + engine: "MKL2017" + name: "conv3_2" + type: "Convolution" + bottom: "conv3_1" + top: "conv3_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "relu3_2" + type: "ReLU" + bottom: "conv3_2" + top: "conv3_2" +} +layer { + engine: "MKL2017" + name: "conv3_3" + type: "Convolution" + bottom: "conv3_2" + top: "conv3_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "relu3_3" + type: "ReLU" + bottom: "conv3_3" + top: "conv3_3" +} +layer { + engine: "MKL2017" + name: "pool3" + type: "Pooling" + bottom: "conv3_3" + top: "pool3" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + engine: "MKL2017" + name: "conv4_1" + type: "Convolution" + bottom: "pool3" + top: "conv4_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "relu4_1" + type: "ReLU" + bottom: "conv4_1" + top: "conv4_1" +} +layer { + engine: "MKL2017" + name: "conv4_2" + type: "Convolution" + bottom: "conv4_1" + top: "conv4_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "relu4_2" + type: "ReLU" + bottom: "conv4_2" + top: "conv4_2" +} +layer { + engine: "MKL2017" + name: "conv4_3" + type: "Convolution" + bottom: "conv4_2" + top: "conv4_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "relu4_3" + type: "ReLU" + bottom: "conv4_3" + top: "conv4_3" +} +layer { + engine: "MKL2017" + name: "pool4" + type: "Pooling" + bottom: "conv4_3" + top: "pool4" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + engine: "MKL2017" + name: "conv5_1" + type: "Convolution" + bottom: "pool4" + top: "conv5_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + dilation: 1 + } +} +layer { + engine: "MKL2017" + name: "relu5_1" + type: "ReLU" + bottom: "conv5_1" + top: "conv5_1" +} +layer { + engine: "MKL2017" + name: "conv5_2" + type: "Convolution" + bottom: "conv5_1" + top: "conv5_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + dilation: 1 + } +} +layer { + engine: "MKL2017" + name: "relu5_2" + type: "ReLU" + bottom: "conv5_2" + top: "conv5_2" +} +layer { + engine: "MKL2017" + name: "conv5_3" + type: "Convolution" + bottom: "conv5_2" + top: "conv5_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + dilation: 1 + } +} +layer { + engine: "MKL2017" + name: "relu5_3" + type: "ReLU" + bottom: "conv5_3" + top: "conv5_3" +} +layer { + engine: "MKL2017" + name: "pool5" + type: "Pooling" + bottom: "conv5_3" + top: "pool5" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +layer { + engine: "MKL2017" + name: "fc6" + type: "Convolution" + bottom: "pool5" + top: "fc6" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 1024 + pad: 6 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + dilation: 6 + } +} +layer { + engine: "MKL2017" + name: "relu6" + type: "ReLU" + bottom: "fc6" + top: "fc6" +} +layer { + engine: "MKL2017" + name: "fc7" + type: "Convolution" + bottom: "fc6" + top: "fc7" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 1024 + kernel_size: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "relu7" + type: "ReLU" + bottom: "fc7" + top: "fc7" +} +layer { + engine: "MKL2017" + name: "conv6_1" + type: "Convolution" + bottom: "fc7" + top: "conv6_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "conv6_1_relu" + type: "ReLU" + bottom: "conv6_1" + top: "conv6_1" +} +layer { + engine: "MKL2017" + name: "conv6_2" + type: "Convolution" + bottom: "conv6_1" + top: "conv6_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "conv6_2_relu" + type: "ReLU" + bottom: "conv6_2" + top: "conv6_2" +} +layer { + engine: "MKL2017" + name: "conv7_1" + type: "Convolution" + bottom: "conv6_2" + top: "conv7_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "conv7_1_relu" + type: "ReLU" + bottom: "conv7_1" + top: "conv7_1" +} +layer { + engine: "MKL2017" + name: "conv7_2" + type: "Convolution" + bottom: "conv7_1" + top: "conv7_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "conv7_2_relu" + type: "ReLU" + bottom: "conv7_2" + top: "conv7_2" +} +layer { + engine: "MKL2017" + name: "conv8_1" + type: "Convolution" + bottom: "conv7_2" + top: "conv8_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "conv8_1_relu" + type: "ReLU" + bottom: "conv8_1" + top: "conv8_1" +} +layer { + engine: "MKL2017" + name: "conv8_2" + type: "Convolution" + bottom: "conv8_1" + top: "conv8_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 0 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "conv8_2_relu" + type: "ReLU" + bottom: "conv8_2" + top: "conv8_2" +} +layer { + engine: "MKL2017" + name: "conv9_1" + type: "Convolution" + bottom: "conv8_2" + top: "conv9_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "conv9_1_relu" + type: "ReLU" + bottom: "conv9_1" + top: "conv9_1" +} +layer { + engine: "MKL2017" + name: "conv9_2" + type: "Convolution" + bottom: "conv9_1" + top: "conv9_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 0 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "conv9_2_relu" + type: "ReLU" + bottom: "conv9_2" + top: "conv9_2" +} +layer { + name: "conv4_3_norm" + type: "Normalize" + bottom: "conv4_3" + top: "conv4_3_norm" + norm_param { + across_spatial: false + scale_filler { + type: "constant" + value: 20 + } + channel_shared: false + } +} +layer { + name: "conv4_3_norm_mbox_loc" + type: "Convolution" + bottom: "conv4_3_norm" + top: "conv4_3_norm_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv4_3_norm_mbox_loc_perm" + type: "Permute" + bottom: "conv4_3_norm_mbox_loc" + top: "conv4_3_norm_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv4_3_norm_mbox_loc_flat" + type: "Flatten" + bottom: "conv4_3_norm_mbox_loc_perm" + top: "conv4_3_norm_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv4_3_norm_mbox_conf" + type: "Convolution" + bottom: "conv4_3_norm" + top: "conv4_3_norm_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 84 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv4_3_norm_mbox_conf_perm" + type: "Permute" + bottom: "conv4_3_norm_mbox_conf" + top: "conv4_3_norm_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv4_3_norm_mbox_conf_flat" + type: "Flatten" + bottom: "conv4_3_norm_mbox_conf_perm" + top: "conv4_3_norm_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv4_3_norm_mbox_priorbox" + type: "PriorBox" + bottom: "conv4_3_norm" + bottom: "data" + top: "conv4_3_norm_mbox_priorbox" + prior_box_param { + min_size: 30.0 + max_size: 60.0 + aspect_ratio: 2 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 8 + offset: 0.5 + } +} +layer { + engine: "MKL2017" + name: "fc7_mbox_loc" + type: "Convolution" + bottom: "fc7" + top: "fc7_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "fc7_mbox_loc_perm" + type: "Permute" + bottom: "fc7_mbox_loc" + top: "fc7_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "fc7_mbox_loc_flat" + type: "Flatten" + bottom: "fc7_mbox_loc_perm" + top: "fc7_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "fc7_mbox_conf" + type: "Convolution" + bottom: "fc7" + top: "fc7_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 126 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "fc7_mbox_conf_perm" + type: "Permute" + bottom: "fc7_mbox_conf" + top: "fc7_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "fc7_mbox_conf_flat" + type: "Flatten" + bottom: "fc7_mbox_conf_perm" + top: "fc7_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "fc7_mbox_priorbox" + type: "PriorBox" + bottom: "fc7" + bottom: "data" + top: "fc7_mbox_priorbox" + prior_box_param { + min_size: 60.0 + max_size: 111.0 + aspect_ratio: 2 + aspect_ratio: 3 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 16 + offset: 0.5 + } +} +layer { + engine: "MKL2017" + name: "conv6_2_mbox_loc" + type: "Convolution" + bottom: "conv6_2" + top: "conv6_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv6_2_mbox_loc_perm" + type: "Permute" + bottom: "conv6_2_mbox_loc" + top: "conv6_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv6_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv6_2_mbox_loc_perm" + top: "conv6_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv6_2_mbox_conf" + type: "Convolution" + bottom: "conv6_2" + top: "conv6_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 126 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv6_2_mbox_conf_perm" + type: "Permute" + bottom: "conv6_2_mbox_conf" + top: "conv6_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv6_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv6_2_mbox_conf_perm" + top: "conv6_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv6_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv6_2" + bottom: "data" + top: "conv6_2_mbox_priorbox" + prior_box_param { + min_size: 111.0 + max_size: 162.0 + aspect_ratio: 2 + aspect_ratio: 3 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 32 + offset: 0.5 + } +} +layer { + name: "conv7_2_mbox_loc" + type: "Convolution" + bottom: "conv7_2" + top: "conv7_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv7_2_mbox_loc_perm" + type: "Permute" + bottom: "conv7_2_mbox_loc" + top: "conv7_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv7_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv7_2_mbox_loc_perm" + top: "conv7_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + engine: "MKL2017" + name: "conv7_2_mbox_conf" + type: "Convolution" + bottom: "conv7_2" + top: "conv7_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 126 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv7_2_mbox_conf_perm" + type: "Permute" + bottom: "conv7_2_mbox_conf" + top: "conv7_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv7_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv7_2_mbox_conf_perm" + top: "conv7_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv7_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv7_2" + bottom: "data" + top: "conv7_2_mbox_priorbox" + prior_box_param { + min_size: 162.0 + max_size: 213.0 + aspect_ratio: 2 + aspect_ratio: 3 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 64 + offset: 0.5 + } +} +layer { + engine: "MKL2017" + name: "conv8_2_mbox_loc" + type: "Convolution" + bottom: "conv8_2" + top: "conv8_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv8_2_mbox_loc_perm" + type: "Permute" + bottom: "conv8_2_mbox_loc" + top: "conv8_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv8_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv8_2_mbox_loc_perm" + top: "conv8_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + engine: "MKL2017" + name: "conv8_2_mbox_conf" + type: "Convolution" + bottom: "conv8_2" + top: "conv8_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 84 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv8_2_mbox_conf_perm" + type: "Permute" + bottom: "conv8_2_mbox_conf" + top: "conv8_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv8_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv8_2_mbox_conf_perm" + top: "conv8_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv8_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv8_2" + bottom: "data" + top: "conv8_2_mbox_priorbox" + prior_box_param { + min_size: 213.0 + max_size: 264.0 + aspect_ratio: 2 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 100 + offset: 0.5 + } +} +layer { + name: "conv9_2_mbox_loc" + type: "Convolution" + bottom: "conv9_2" + top: "conv9_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv9_2_mbox_loc_perm" + type: "Permute" + bottom: "conv9_2_mbox_loc" + top: "conv9_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv9_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv9_2_mbox_loc_perm" + top: "conv9_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + engine: "MKL2017" + name: "conv9_2_mbox_conf" + type: "Convolution" + bottom: "conv9_2" + top: "conv9_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 84 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv9_2_mbox_conf_perm" + type: "Permute" + bottom: "conv9_2_mbox_conf" + top: "conv9_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv9_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv9_2_mbox_conf_perm" + top: "conv9_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv9_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv9_2" + bottom: "data" + top: "conv9_2_mbox_priorbox" + prior_box_param { + min_size: 264.0 + max_size: 315.0 + aspect_ratio: 2 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 300 + offset: 0.5 + } +} +layer { + name: "mbox_loc" + type: "Concat" + bottom: "conv4_3_norm_mbox_loc_flat" + bottom: "fc7_mbox_loc_flat" + bottom: "conv6_2_mbox_loc_flat" + bottom: "conv7_2_mbox_loc_flat" + bottom: "conv8_2_mbox_loc_flat" + bottom: "conv9_2_mbox_loc_flat" + top: "mbox_loc" + concat_param { + axis: 1 + } + engine: "CAFFE" +} +layer { + name: "mbox_conf" + type: "Concat" + bottom: "conv4_3_norm_mbox_conf_flat" + bottom: "fc7_mbox_conf_flat" + bottom: "conv6_2_mbox_conf_flat" + bottom: "conv7_2_mbox_conf_flat" + bottom: "conv8_2_mbox_conf_flat" + bottom: "conv9_2_mbox_conf_flat" + top: "mbox_conf" + concat_param { + axis: 1 + } + engine: "CAFFE" +} +layer { + name: "mbox_priorbox" + type: "Concat" + bottom: "conv4_3_norm_mbox_priorbox" + bottom: "fc7_mbox_priorbox" + bottom: "conv6_2_mbox_priorbox" + bottom: "conv7_2_mbox_priorbox" + bottom: "conv8_2_mbox_priorbox" + bottom: "conv9_2_mbox_priorbox" + top: "mbox_priorbox" + concat_param { + axis: 2 + } + engine: "CAFFE" +} diff --git a/models/intel_optimized_models/ssd/VGGNet/VOC0712/SSD_300x300/deploy_mkldnn.prototxt b/models/intel_optimized_models/ssd/VGGNet/VOC0712/SSD_300x300/deploy_mkldnn.prototxt new file mode 100644 index 000000000..754549d27 --- /dev/null +++ b/models/intel_optimized_models/ssd/VGGNet/VOC0712/SSD_300x300/deploy_mkldnn.prototxt @@ -0,0 +1,1626 @@ +name: "VGG_VOC0712_SSD_300x300_deploy" +input: "data" +input_shape { + dim: 1 + dim: 3 + dim: 300 + dim: 300 +} +layer { + engine: "MKLDNN" + name: "conv1_1" + type: "Convolution" + bottom: "data" + top: "conv1_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "relu1_1" + type: "ReLU" + bottom: "conv1_1" + top: "conv1_1" +} +layer { + engine: "MKLDNN" + name: "conv1_2" + type: "Convolution" + bottom: "conv1_1" + top: "conv1_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "relu1_2" + type: "ReLU" + bottom: "conv1_2" + top: "conv1_2" +} +layer { + engine: "MKLDNN" + name: "pool1" + type: "Pooling" + bottom: "conv1_2" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + engine: "MKLDNN" + name: "conv2_1" + type: "Convolution" + bottom: "pool1" + top: "conv2_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "relu2_1" + type: "ReLU" + bottom: "conv2_1" + top: "conv2_1" +} +layer { + engine: "MKLDNN" + name: "conv2_2" + type: "Convolution" + bottom: "conv2_1" + top: "conv2_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "relu2_2" + type: "ReLU" + bottom: "conv2_2" + top: "conv2_2" +} +layer { + engine: "MKLDNN" + name: "pool2" + type: "Pooling" + bottom: "conv2_2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + engine: "MKLDNN" + name: "conv3_1" + type: "Convolution" + bottom: "pool2" + top: "conv3_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "relu3_1" + type: "ReLU" + bottom: "conv3_1" + top: "conv3_1" +} +layer { + engine: "MKLDNN" + name: "conv3_2" + type: "Convolution" + bottom: "conv3_1" + top: "conv3_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "relu3_2" + type: "ReLU" + bottom: "conv3_2" + top: "conv3_2" +} +layer { + engine: "MKLDNN" + name: "conv3_3" + type: "Convolution" + bottom: "conv3_2" + top: "conv3_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "relu3_3" + type: "ReLU" + bottom: "conv3_3" + top: "conv3_3" +} +layer { + engine: "MKLDNN" + name: "pool3" + type: "Pooling" + bottom: "conv3_3" + top: "pool3" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + engine: "MKLDNN" + name: "conv4_1" + type: "Convolution" + bottom: "pool3" + top: "conv4_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "relu4_1" + type: "ReLU" + bottom: "conv4_1" + top: "conv4_1" +} +layer { + engine: "MKLDNN" + name: "conv4_2" + type: "Convolution" + bottom: "conv4_1" + top: "conv4_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "relu4_2" + type: "ReLU" + bottom: "conv4_2" + top: "conv4_2" +} +layer { + engine: "MKLDNN" + name: "conv4_3" + type: "Convolution" + bottom: "conv4_2" + top: "conv4_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "relu4_3" + type: "ReLU" + bottom: "conv4_3" + top: "conv4_3" +} +layer { + engine: "MKLDNN" + name: "pool4" + type: "Pooling" + bottom: "conv4_3" + top: "pool4" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + engine: "MKLDNN" + name: "conv5_1" + type: "Convolution" + bottom: "pool4" + top: "conv5_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + dilation: 1 + } +} +layer { + engine: "MKLDNN" + name: "relu5_1" + type: "ReLU" + bottom: "conv5_1" + top: "conv5_1" +} +layer { + engine: "MKLDNN" + name: "conv5_2" + type: "Convolution" + bottom: "conv5_1" + top: "conv5_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + dilation: 1 + } +} +layer { + engine: "MKLDNN" + name: "relu5_2" + type: "ReLU" + bottom: "conv5_2" + top: "conv5_2" +} +layer { + engine: "MKLDNN" + name: "conv5_3" + type: "Convolution" + bottom: "conv5_2" + top: "conv5_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + dilation: 1 + } +} +layer { + engine: "MKLDNN" + name: "relu5_3" + type: "ReLU" + bottom: "conv5_3" + top: "conv5_3" +} +layer { + engine: "MKLDNN" + name: "pool5" + type: "Pooling" + bottom: "conv5_3" + top: "pool5" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +layer { + engine: "MKLDNN" + name: "fc6" + type: "Convolution" + bottom: "pool5" + top: "fc6" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 1024 + pad: 6 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + dilation: 6 + } +} +layer { + engine: "MKLDNN" + name: "relu6" + type: "ReLU" + bottom: "fc6" + top: "fc6" +} +layer { + engine: "MKLDNN" + name: "fc7" + type: "Convolution" + bottom: "fc6" + top: "fc7" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 1024 + kernel_size: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "relu7" + type: "ReLU" + bottom: "fc7" + top: "fc7" +} +layer { + engine: "MKLDNN" + name: "conv6_1" + type: "Convolution" + bottom: "fc7" + top: "conv6_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "conv6_1_relu" + type: "ReLU" + bottom: "conv6_1" + top: "conv6_1" +} +layer { + engine: "MKLDNN" + name: "conv6_2" + type: "Convolution" + bottom: "conv6_1" + top: "conv6_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "conv6_2_relu" + type: "ReLU" + bottom: "conv6_2" + top: "conv6_2" +} +layer { + engine: "MKLDNN" + name: "conv7_1" + type: "Convolution" + bottom: "conv6_2" + top: "conv7_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "conv7_1_relu" + type: "ReLU" + bottom: "conv7_1" + top: "conv7_1" +} +layer { + engine: "MKLDNN" + name: "conv7_2" + type: "Convolution" + bottom: "conv7_1" + top: "conv7_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "conv7_2_relu" + type: "ReLU" + bottom: "conv7_2" + top: "conv7_2" +} +layer { + engine: "MKLDNN" + name: "conv8_1" + type: "Convolution" + bottom: "conv7_2" + top: "conv8_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "conv8_1_relu" + type: "ReLU" + bottom: "conv8_1" + top: "conv8_1" +} +layer { + engine: "MKLDNN" + name: "conv8_2" + type: "Convolution" + bottom: "conv8_1" + top: "conv8_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 0 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "conv8_2_relu" + type: "ReLU" + bottom: "conv8_2" + top: "conv8_2" +} +layer { + engine: "MKLDNN" + name: "conv9_1" + type: "Convolution" + bottom: "conv8_2" + top: "conv9_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "conv9_1_relu" + type: "ReLU" + bottom: "conv9_1" + top: "conv9_1" +} +layer { + engine: "MKLDNN" + name: "conv9_2" + type: "Convolution" + bottom: "conv9_1" + top: "conv9_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 0 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "conv9_2_relu" + type: "ReLU" + bottom: "conv9_2" + top: "conv9_2" +} +layer { + name: "conv4_3_norm" + type: "Normalize" + bottom: "conv4_3" + top: "conv4_3_norm" + norm_param { + across_spatial: false + scale_filler { + type: "constant" + value: 20 + } + channel_shared: false + } +} +layer { + name: "conv4_3_norm_mbox_loc" + type: "Convolution" + bottom: "conv4_3_norm" + top: "conv4_3_norm_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv4_3_norm_mbox_loc_perm" + type: "Permute" + bottom: "conv4_3_norm_mbox_loc" + top: "conv4_3_norm_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv4_3_norm_mbox_loc_flat" + type: "Flatten" + bottom: "conv4_3_norm_mbox_loc_perm" + top: "conv4_3_norm_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv4_3_norm_mbox_conf" + type: "Convolution" + bottom: "conv4_3_norm" + top: "conv4_3_norm_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 84 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv4_3_norm_mbox_conf_perm" + type: "Permute" + bottom: "conv4_3_norm_mbox_conf" + top: "conv4_3_norm_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv4_3_norm_mbox_conf_flat" + type: "Flatten" + bottom: "conv4_3_norm_mbox_conf_perm" + top: "conv4_3_norm_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv4_3_norm_mbox_priorbox" + type: "PriorBox" + bottom: "conv4_3_norm" + bottom: "data" + top: "conv4_3_norm_mbox_priorbox" + prior_box_param { + min_size: 30.0 + max_size: 60.0 + aspect_ratio: 2 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 8 + offset: 0.5 + } +} +layer { + engine: "MKLDNN" + name: "fc7_mbox_loc" + type: "Convolution" + bottom: "fc7" + top: "fc7_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "fc7_mbox_loc_perm" + type: "Permute" + bottom: "fc7_mbox_loc" + top: "fc7_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "fc7_mbox_loc_flat" + type: "Flatten" + bottom: "fc7_mbox_loc_perm" + top: "fc7_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "fc7_mbox_conf" + type: "Convolution" + bottom: "fc7" + top: "fc7_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 126 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "fc7_mbox_conf_perm" + type: "Permute" + bottom: "fc7_mbox_conf" + top: "fc7_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "fc7_mbox_conf_flat" + type: "Flatten" + bottom: "fc7_mbox_conf_perm" + top: "fc7_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "fc7_mbox_priorbox" + type: "PriorBox" + bottom: "fc7" + bottom: "data" + top: "fc7_mbox_priorbox" + prior_box_param { + min_size: 60.0 + max_size: 111.0 + aspect_ratio: 2 + aspect_ratio: 3 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 16 + offset: 0.5 + } +} +layer { + engine: "MKLDNN" + name: "conv6_2_mbox_loc" + type: "Convolution" + bottom: "conv6_2" + top: "conv6_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv6_2_mbox_loc_perm" + type: "Permute" + bottom: "conv6_2_mbox_loc" + top: "conv6_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv6_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv6_2_mbox_loc_perm" + top: "conv6_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv6_2_mbox_conf" + type: "Convolution" + bottom: "conv6_2" + top: "conv6_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 126 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv6_2_mbox_conf_perm" + type: "Permute" + bottom: "conv6_2_mbox_conf" + top: "conv6_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv6_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv6_2_mbox_conf_perm" + top: "conv6_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv6_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv6_2" + bottom: "data" + top: "conv6_2_mbox_priorbox" + prior_box_param { + min_size: 111.0 + max_size: 162.0 + aspect_ratio: 2 + aspect_ratio: 3 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 32 + offset: 0.5 + } +} +layer { + name: "conv7_2_mbox_loc" + type: "Convolution" + bottom: "conv7_2" + top: "conv7_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv7_2_mbox_loc_perm" + type: "Permute" + bottom: "conv7_2_mbox_loc" + top: "conv7_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv7_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv7_2_mbox_loc_perm" + top: "conv7_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + engine: "MKLDNN" + name: "conv7_2_mbox_conf" + type: "Convolution" + bottom: "conv7_2" + top: "conv7_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 126 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv7_2_mbox_conf_perm" + type: "Permute" + bottom: "conv7_2_mbox_conf" + top: "conv7_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv7_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv7_2_mbox_conf_perm" + top: "conv7_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv7_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv7_2" + bottom: "data" + top: "conv7_2_mbox_priorbox" + prior_box_param { + min_size: 162.0 + max_size: 213.0 + aspect_ratio: 2 + aspect_ratio: 3 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 64 + offset: 0.5 + } +} +layer { + engine: "MKLDNN" + name: "conv8_2_mbox_loc" + type: "Convolution" + bottom: "conv8_2" + top: "conv8_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv8_2_mbox_loc_perm" + type: "Permute" + bottom: "conv8_2_mbox_loc" + top: "conv8_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv8_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv8_2_mbox_loc_perm" + top: "conv8_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + engine: "MKLDNN" + name: "conv8_2_mbox_conf" + type: "Convolution" + bottom: "conv8_2" + top: "conv8_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 84 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv8_2_mbox_conf_perm" + type: "Permute" + bottom: "conv8_2_mbox_conf" + top: "conv8_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv8_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv8_2_mbox_conf_perm" + top: "conv8_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv8_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv8_2" + bottom: "data" + top: "conv8_2_mbox_priorbox" + prior_box_param { + min_size: 213.0 + max_size: 264.0 + aspect_ratio: 2 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 100 + offset: 0.5 + } +} +layer { + name: "conv9_2_mbox_loc" + type: "Convolution" + bottom: "conv9_2" + top: "conv9_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv9_2_mbox_loc_perm" + type: "Permute" + bottom: "conv9_2_mbox_loc" + top: "conv9_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv9_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv9_2_mbox_loc_perm" + top: "conv9_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + engine: "MKLDNN" + name: "conv9_2_mbox_conf" + type: "Convolution" + bottom: "conv9_2" + top: "conv9_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 84 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv9_2_mbox_conf_perm" + type: "Permute" + bottom: "conv9_2_mbox_conf" + top: "conv9_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv9_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv9_2_mbox_conf_perm" + top: "conv9_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv9_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv9_2" + bottom: "data" + top: "conv9_2_mbox_priorbox" + prior_box_param { + min_size: 264.0 + max_size: 315.0 + aspect_ratio: 2 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 300 + offset: 0.5 + } +} +layer { + name: "mbox_loc" + type: "Concat" + bottom: "conv4_3_norm_mbox_loc_flat" + bottom: "fc7_mbox_loc_flat" + bottom: "conv6_2_mbox_loc_flat" + bottom: "conv7_2_mbox_loc_flat" + bottom: "conv8_2_mbox_loc_flat" + bottom: "conv9_2_mbox_loc_flat" + top: "mbox_loc" + concat_param { + axis: 1 + } + engine: "CAFFE" +} +layer { + name: "mbox_conf" + type: "Concat" + bottom: "conv4_3_norm_mbox_conf_flat" + bottom: "fc7_mbox_conf_flat" + bottom: "conv6_2_mbox_conf_flat" + bottom: "conv7_2_mbox_conf_flat" + bottom: "conv8_2_mbox_conf_flat" + bottom: "conv9_2_mbox_conf_flat" + top: "mbox_conf" + concat_param { + axis: 1 + } + engine: "CAFFE" +} +layer { + name: "mbox_priorbox" + type: "Concat" + bottom: "conv4_3_norm_mbox_priorbox" + bottom: "fc7_mbox_priorbox" + bottom: "conv6_2_mbox_priorbox" + bottom: "conv7_2_mbox_priorbox" + bottom: "conv8_2_mbox_priorbox" + bottom: "conv9_2_mbox_priorbox" + top: "mbox_priorbox" + concat_param { + axis: 2 + } + engine: "CAFFE" +} From e1f459de94f7754c651f8ff5f5a70c3d5e06e84c Mon Sep 17 00:00:00 2001 From: Haihao Shen Date: Fri, 14 Jul 2017 16:07:44 +0900 Subject: [PATCH 35/54] Fix the bug of conv+relu fusion --- src/caffe/layers/mkldnn_relu_layer.cpp | 5 ++++- src/caffe/net.cpp | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/caffe/layers/mkldnn_relu_layer.cpp b/src/caffe/layers/mkldnn_relu_layer.cpp index 57e9be70d..2cdb5fcdb 100644 --- a/src/caffe/layers/mkldnn_relu_layer.cpp +++ b/src/caffe/layers/mkldnn_relu_layer.cpp @@ -145,7 +145,10 @@ template void MKLDNNReLULayer::Forward_cpu(const vector*>& bottom ,const vector*>& top) { - if(this->layer_param_.relu_param().fuse()) return; + if(this->layer_param_.relu_param().fuse()) { + top[0]->ShareData(*bottom[0]); + return; + } VLOG(1) << "MKLDNNReLULayer::Forward_cpu: " << this->layer_param_.name(); #ifdef DEBUG diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index 8795287dc..a0e8a08fe 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -681,10 +681,10 @@ void Net::CompilationRuleTwo(const NetParameter& param, // Remove dropped layer from the list of layers to be dropped layers_to_drop.erase(layers_to_drop.find(layer_param->name())); } + } - if (layer_included) { - param_compiled->add_layer()->CopyFrom(*layer_param); - } + if (layer_included) { + param_compiled->add_layer()->CopyFrom(*layer_param); } } } From d34dcba5cd79f3cdc9803058de1566410c20f4b7 Mon Sep 17 00:00:00 2001 From: xinanlin Date: Fri, 14 Jul 2017 17:07:38 +0800 Subject: [PATCH 36/54] parallelize cmake MKLDNN Build Change-Id: Ie84f75e3db46686470488d712cf017719939b519 --- cmake/MKLDNN.cmake | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/cmake/MKLDNN.cmake b/cmake/MKLDNN.cmake index 97000b7a5..43c51f7ee 100644 --- a/cmake/MKLDNN.cmake +++ b/cmake/MKLDNN.cmake @@ -8,7 +8,14 @@ function(Download_MKLDNN) execute_process(COMMAND cat mkldnn.commit WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE MKLDNN_COMMIT) - + + include(ProcessorCount) + ProcessorCount(NCORE) + if(NOT NCORE EQUAL 0) + set(CTEST_BUILD_FLAGS -j${NCORE}) + set(ctest_test_args ${ctest_test_args} PARALLEL_LEVEL ${NCORE}) + endif() + ExternalProject_add(MKLDNN_Build SOURCE_DIR ${MKLDNN_SOURCE_DIR} CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} -DMKLROOT=${MKL_ROOT_DIR} @@ -20,7 +27,7 @@ function(Download_MKLDNN) BUILD_COMMAND cmake ${MKLDNN_SOURCE_DIR} #--Install step INSTALL_DIR ${MKLDNN_INSTALL_DIR} - INSTALL_COMMAND make install + INSTALL_COMMAND make install -j${NCORE} LOG_CONFIGURE 1 LOG_BUILD 1 LOG_INSTALL 1 From 48fde47a183fe84c9f481a39b9c51308935be0f6 Mon Sep 17 00:00:00 2001 From: "Yu, Chong" Date: Sun, 16 Jul 2017 22:36:37 +0800 Subject: [PATCH 37/54] Warning removal for mkldnn relu: mkl-dnn api relu deprecation. --- src/caffe/layers/mkldnn_relu_layer.cpp | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/caffe/layers/mkldnn_relu_layer.cpp b/src/caffe/layers/mkldnn_relu_layer.cpp index 2cdb5fcdb..2437940d3 100644 --- a/src/caffe/layers/mkldnn_relu_layer.cpp +++ b/src/caffe/layers/mkldnn_relu_layer.cpp @@ -70,8 +70,6 @@ void MKLDNNReLULayer::Reshape(const vector*>& bottom template void MKLDNNReLULayer::InitReLUFwd(const vector*>& bottom, const vector*>& top) { - if(this->layer_param_.relu_param().fuse()) return; - if (std::is_same::value) NOT_IMPLEMENTED; auto propagation = this->phase_ == TEST ? prop_kind::forward_scoring : prop_kind::forward_training; int32_t n = this->num_; @@ -101,7 +99,10 @@ void MKLDNNReLULayer::InitReLUFwd(const vector*>& bottom, con top_data_md = bottom_data_md; // ---- Initialize relu primitive descriptor ------------- - relu_forward::desc reluFwd_desc(propagation, *bottom_data_md, negative_slope); + //relu_forward::desc reluFwd_desc(propagation, *bottom_data_md, negative_slope); + // MKLDNN is deprecating standalone relu primitive in MKL-DNN. + // Now MKLDNN has eltwise primitive with eltwise_relu algorithm inside. + eltwise_forward::desc eltwise_reluFwd_desc(propagation, eltwise_relu, *bottom_data_md, negative_slope); // ---- Determining engine to use ----------------------- std::string subengines = this->layer_param_.engine(); @@ -111,7 +112,7 @@ void MKLDNNReLULayer::InitReLUFwd(const vector*>& bottom, con unsigned subEngineIndex = 0; for(; subEngineIndex < ep.getNumberOfSubEngines(); subEngineIndex++) { try { - reluFwd_pd.reset(new relu_forward::primitive_desc(reluFwd_desc, + reluFwd_pd.reset(new relu_forward::primitive_desc(eltwise_reluFwd_desc, ep.getMKLDNNSubEngine(subEngineIndex))); } catch(...) { @@ -145,11 +146,6 @@ template void MKLDNNReLULayer::Forward_cpu(const vector*>& bottom ,const vector*>& top) { - if(this->layer_param_.relu_param().fuse()) { - top[0]->ShareData(*bottom[0]); - return; - } - VLOG(1) << "MKLDNNReLULayer::Forward_cpu: " << this->layer_param_.name(); #ifdef DEBUG LOG(INFO) << "MKLDNNReLULayer::Forward_cpu: " << this->layer_param_.name(); @@ -250,7 +246,10 @@ void MKLDNNReLULayer::InitReLUBwd(const vector*>& top bottom_diff_md = top_diff_md; // ---- Initialize relu primitive descriptor ------------- - relu_backward::desc reluBwd_desc(*top_diff_md, *top_data_md, negative_slope); + //relu_backward::desc reluBwd_desc(*top_diff_md, *top_data_md, negative_slope); + // MKLDNN is deprecating standalone relu primitive in MKL-DNN. + // Now MKLDNN has eltwise primitive with eltwise_relu algorithm inside. + eltwise_backward::desc eltwise_reluBwd_desc(eltwise_relu, *top_diff_md, *top_data_md, negative_slope); // ---- Determining engine to use ----------------------- std::string subengines = this->layer_param_.engine(); @@ -260,7 +259,7 @@ void MKLDNNReLULayer::InitReLUBwd(const vector*>& top unsigned subEngineIndex = 0; for(; subEngineIndex < ep.getNumberOfSubEngines(); subEngineIndex++) { try { - reluBwd_pd.reset(new relu_backward::primitive_desc(reluBwd_desc, + reluBwd_pd.reset(new relu_backward::primitive_desc(eltwise_reluBwd_desc, ep.getMKLDNNSubEngine(subEngineIndex), *reluFwd_pd)); } catch(...) { From 98a2897639f3e5a75693a4b90978783e54ca43bc Mon Sep 17 00:00:00 2001 From: fzou1 Date: Mon, 17 Jul 2017 14:08:59 +0800 Subject: [PATCH 38/54] ICL-125: support input layer in multi-node training --- src/caffe/net.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index a0e8a08fe..a4224f9ba 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -267,6 +267,8 @@ void Net::Init(const NetParameter& in_param) { batch_size = layer_param.memory_data_param().batch_size(); else if (!layer_param.type().compare("WindowData")) batch_size = layer_param.window_data_param().batch_size(); + else if (!layer_param.type().compare("Input")) + batch_size = layer_param.input_param().shape(0).dim(0); if (caffe::TRAIN == param.state().phase()) { LOG(WARNING) << "SetMinibatchSize " << batch_size; From 0dd673a85972667f646f9db95f3cb65b323d8dba Mon Sep 17 00:00:00 2001 From: Haihao Shen Date: Mon, 17 Jul 2017 11:09:06 +0800 Subject: [PATCH 39/54] Fix the conv + relu in training path with relu_fwd_pd initialized --- src/caffe/layers/mkldnn_relu_layer.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/caffe/layers/mkldnn_relu_layer.cpp b/src/caffe/layers/mkldnn_relu_layer.cpp index 2437940d3..9626fdc25 100644 --- a/src/caffe/layers/mkldnn_relu_layer.cpp +++ b/src/caffe/layers/mkldnn_relu_layer.cpp @@ -154,6 +154,11 @@ void MKLDNNReLULayer::Forward_cpu(const vector*>& bottom bool inplace = (bottom[0] == top[0]); if( reluFwd_pd == NULL) InitReLUFwd(bottom, top); + + if(this->layer_param_.relu_param().fuse()) { + top[0]->ShareData(*bottom[0]); + return; + } // making reorders if needed. fwd_bottom_data->sync_before_read(); // update top that head at prv From 4ed1c3357eceffbe902a210c6ef3c900fc9bd920 Mon Sep 17 00:00:00 2001 From: "Gong, Jiong" Date: Tue, 18 Jul 2017 00:15:32 +0800 Subject: [PATCH 40/54] Support data augmentation with random resizing --- include/caffe/data_transformer.hpp | 5 ++ src/caffe/data_transformer.cpp | 74 ++++++++++++++++++++++++------ src/caffe/proto/caffe.proto | 8 ++++ 3 files changed, 74 insertions(+), 13 deletions(-) diff --git a/include/caffe/data_transformer.hpp b/include/caffe/data_transformer.hpp index d95df5a0c..f709a281b 100644 --- a/include/caffe/data_transformer.hpp +++ b/include/caffe/data_transformer.hpp @@ -396,6 +396,11 @@ class DataTransformer { bool has_mean_values> void Transform(const Datum& datum, Dtype* transformed_data, NormalizedBBox* crop_bbox, RandNumbers& rand_num); + +#ifdef USE_OPENCV + void RandomResizeImage(const Datum& datum, Datum *resized_datum); + void RandomResizeImage(const cv::Mat& img, cv::Mat *resized_img); +#endif }; } // namespace caffe diff --git a/src/caffe/data_transformer.cpp b/src/caffe/data_transformer.cpp index 7f4fbc830..5a1ae5511 100644 --- a/src/caffe/data_transformer.cpp +++ b/src/caffe/data_transformer.cpp @@ -172,14 +172,24 @@ void DataTransformer::Transform(const Datum& datum, template template -void DataTransformer::Transform(const Datum& datum, +void DataTransformer::Transform(const Datum& datum_in, Dtype* transformed_data, NormalizedBBox* crop_bbox, RandNumbers& rand_num) { - const string& data = datum.data(); - const int datum_channels = datum.channels(); - const int datum_height = datum.height(); - const int datum_width = datum.width(); + const Datum *datum = &datum_in; + Datum resized_datum; + if (param_.has_random_resize_param()) { +#ifdef USE_OPENCV + RandomResizeImage(datum_in, &resized_datum); + datum = &resized_datum; +#else + LOG(FATAL) << "Random image resizing requires OpenCV; compile with USE_OPENCV."; +#endif + } + const string& data = datum->data(); + const int datum_channels = datum->channels(); + const int datum_height = datum->height(); + const int datum_width = datum->width(); const int crop_size = param_.crop_size(); const Dtype scale = param_.scale(); @@ -245,7 +255,7 @@ void DataTransformer::Transform(const Datum& datum, datum_element = static_cast(static_cast(data[data_index])); } else { - datum_element = datum.float_data(data_index); + datum_element = datum->float_data(data_index); } if (has_mean_file) { transformed_data[top_index] = @@ -756,10 +766,20 @@ void DataTransformer::Transform(const cv::Mat& cv_img, template template -void DataTransformer::Transform(const cv::Mat& cv_img, +void DataTransformer::Transform(const cv::Mat& cv_img_in, Blob* transformed_blob, NormalizedBBox* crop_bbox, RandNumbers& rand_num) { + const cv::Mat *cv_img = &cv_img_in; + cv::Mat resized_img; + if (param_.has_random_resize_param()) { +#ifdef USE_OPENCV + RandomResizeImage(cv_img_in, &resized_img); + cv_img = &resized_img; +#else + LOG(FATAL) << "Random image resizing requires OpenCV; compile with USE_OPENCV."; +#endif + } const int crop_size = param_.crop_size(); - const int img_channels = cv_img.channels(); + const int img_channels = cv_img->channels(); // Check dimensions. const int channels = transformed_blob->channels(); @@ -770,7 +790,7 @@ void DataTransformer::Transform(const cv::Mat& cv_img, CHECK_EQ(channels, img_channels); CHECK_GE(num, 1); - CHECK(cv_img.depth() == CV_8U) << "Image data type must be unsigned byte"; + CHECK(cv_img->depth() == CV_8U) << "Image data type must be unsigned byte"; const Dtype scale = param_.scale(); @@ -793,9 +813,9 @@ void DataTransformer::Transform(const cv::Mat& cv_img, } cv::Mat cv_resized_img, cv_noised_img; if (param_.has_resize_param()) { - cv_resized_img = ApplyResize(cv_img, param_.resize_param()); + cv_resized_img = ApplyResize(*cv_img, param_.resize_param()); } else { - cv_resized_img = cv_img; + cv_resized_img = *cv_img; } if (param_.has_noise_param()) { cv_noised_img = ApplyNoise(cv_resized_img, param_.noise_param()); @@ -809,7 +829,7 @@ void DataTransformer::Transform(const cv::Mat& cv_img, int h_off = 0; int w_off = 0; - cv::Mat cv_cropped_img = cv_img; + cv::Mat cv_cropped_img = *cv_img; if (crop_size) { CHECK_EQ(crop_size, height); CHECK_EQ(crop_size, width); @@ -822,7 +842,7 @@ void DataTransformer::Transform(const cv::Mat& cv_img, w_off = (img_width - crop_size) / 2; } cv::Rect roi(w_off, h_off, crop_size, crop_size); - cv_cropped_img = cv_img(roi); + cv_cropped_img = (*cv_img)(roi); } else { cv_cropped_img = cv_noised_img; } @@ -1035,6 +1055,34 @@ void DataTransformer::ExpandImage(const cv::Mat& img, img.copyTo((*expand_img)(bbox_roi)); } +template +void DataTransformer::RandomResizeImage(const Datum& datum, Datum *resized_datum) { + cv::Mat img = DecodeDatumToCVMatNative(datum); + cv::Mat resized_img; + RandomResizeImage(img, &resized_img); + CVMatToDatum(resized_img, resized_datum); +} + +template +void DataTransformer::RandomResizeImage(const cv::Mat& img, cv::Mat *resized_img) { + int h = img.size().height; + int w = img.size().width; + int min_size = param_.random_resize_param().min_size(); + int max_size = param_.random_resize_param().max_size(); + ResizeParameter resize_param = param_.random_resize_param().resize_param(); + if (min_size == 0) min_size = std::min(h,w); + if (max_size == 0) max_size = std::max(h,w); + int shorter_size = rand_num_(max_size - min_size + 1) + min_size; + resize_param.set_height(shorter_size); + resize_param.set_width(shorter_size); + if (h < w) { + resize_param.set_width(int(float(w) / h * shorter_size)); + } else { + resize_param.set_height(int(float(h) / w * shorter_size)); + } + *resized_img = ApplyResize(img, resize_param); +} + #endif // USE_OPENCV template diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 20c32507f..3bf537607 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -645,6 +645,14 @@ message TransformationParameter { optional ExpansionParameter expand_param = 14; // Constraint for emitting the annotation after transformation. optional EmitConstraint emit_constraint = 10; + // Resize the input randomly + optional RandomResizeParameter random_resize_param = 15; +} + +message RandomResizeParameter { + optional uint32 min_size = 1 [default = 0]; + optional uint32 max_size = 2 [default = 0]; + optional ResizeParameter resize_param = 3; } // Message that stores parameters used by data transformer for resize policy From f257968c05f08a2ac4d55d9935c597fc667ccb6c Mon Sep 17 00:00:00 2001 From: "Gong, Jiong" Date: Tue, 18 Jul 2017 03:03:38 +0800 Subject: [PATCH 41/54] handle raw datum --- src/caffe/data_transformer.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/caffe/data_transformer.cpp b/src/caffe/data_transformer.cpp index 5a1ae5511..5185308e8 100644 --- a/src/caffe/data_transformer.cpp +++ b/src/caffe/data_transformer.cpp @@ -1057,9 +1057,17 @@ void DataTransformer::ExpandImage(const cv::Mat& img, template void DataTransformer::RandomResizeImage(const Datum& datum, Datum *resized_datum) { - cv::Mat img = DecodeDatumToCVMatNative(datum); + shared_ptr img; + if (datum.encoded()) { + img = shared_ptr(new cv::Mat(DecodeDatumToCVMatNative(datum))); + } else { + img = shared_ptr(new cv::Mat( + cv::Size(datum.width(), datum.height()), + CV_8UC(datum.channels()), + (void*)datum.data().data())); + } cv::Mat resized_img; - RandomResizeImage(img, &resized_img); + RandomResizeImage(*img, &resized_img); CVMatToDatum(resized_img, resized_datum); } From 9cedc800f50b83cdaf12ed3e6026887cd46bf2c1 Mon Sep 17 00:00:00 2001 From: Zhang Date: Tue, 18 Jul 2017 13:29:16 +0800 Subject: [PATCH 42/54] Enable the MKLDNN parallel build for non-cmake building --- Makefile.mkldnn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.mkldnn b/Makefile.mkldnn index 51f7fcab6..ec1a70bc5 100644 --- a/Makefile.mkldnn +++ b/Makefile.mkldnn @@ -32,7 +32,7 @@ mkldnn_download: mkldnn_build: mkldnn_download cmake $(MKLDNN_CMAKE_FLAGS) - make -C $(CAFFE_ROOTDIR)/$(MKLDNN_BUILDDIR) + make -C $(CAFFE_ROOTDIR)/$(MKLDNN_BUILDDIR) -j$(shell cat /proc/cpuinfo |grep 'processor'|wc -l) make -C $(CAFFE_ROOTDIR)/$(MKLDNN_BUILDDIR) install else mkldnn_download: From f6e222d5e30fa3439db2d713be92c87fd9e51abc Mon Sep 17 00:00:00 2001 From: "Gong, Jiong" Date: Tue, 11 Jul 2017 14:27:10 +0800 Subject: [PATCH 43/54] make sure the order of initialization and destruction of MPI-related global variables within the scope of MPI lifecycle --- src/caffe/multinode/mlsl.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/caffe/multinode/mlsl.cpp b/src/caffe/multinode/mlsl.cpp index 31ffef7ee..8a0f772af 100644 --- a/src/caffe/multinode/mlsl.cpp +++ b/src/caffe/multinode/mlsl.cpp @@ -44,15 +44,18 @@ namespace caffe { namespace mn { boost::mutex distrib_lock; - std::map, boost::shared_ptr> distrib_map; + std::map, boost::shared_ptr> *distrib_map; void init(int* argc, char **argv[]) { static class initialize { public: initialize(int* argc, char** argv[]) { MLSL::Environment::GetEnv().Init(argc, argv); + distrib_map = + new std::map, boost::shared_ptr>(); } ~initialize() { + delete distrib_map; MLSL::Environment::GetEnv().Finalize(); } } __init{ argc, argv }; @@ -69,15 +72,15 @@ namespace caffe { Distribution * get_distrib(int dataParts, int modelParts) { boost::mutex::scoped_lock l(distrib_lock); std::pair key = std::make_pair(dataParts, modelParts); - if (distrib_map.find(key) == distrib_map.end()) { + if (distrib_map->find(key) == distrib_map->end()) { int node_id = get_node_id(); int num_nodes = get_nodes_count(); int modelColor = node_id / modelParts; int dataColor = node_id % (num_nodes / dataParts); - distrib_map[key] = boost::shared_ptr( + (*distrib_map)[key] = boost::shared_ptr( new Distribution(dataParts, modelParts, dataColor, modelColor)); } - return distrib_map[key].get(); + return (*distrib_map)[key].get(); } Distribution * get_distrib() { From 3aa2184bb2eb866e12a00dee624e4e58c2372213 Mon Sep 17 00:00:00 2001 From: "Yu, Chong" Date: Tue, 18 Jul 2017 12:18:39 -0400 Subject: [PATCH 44/54] Fix the prv memory primitive descriptor initialization in the Pooling layers. --- src/caffe/layers/mkldnn_pooling_layer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/caffe/layers/mkldnn_pooling_layer.cpp b/src/caffe/layers/mkldnn_pooling_layer.cpp index f5feb50b0..4a54a2efc 100644 --- a/src/caffe/layers/mkldnn_pooling_layer.cpp +++ b/src/caffe/layers/mkldnn_pooling_layer.cpp @@ -425,7 +425,7 @@ void MKLDNNPoolingLayer::InitPoolingBwd(const vector*>& top // ---- Initialize remaining memory descriptors ------------- shared_ptr prv_bwd_bottom_diff_mpd, prv_bwd_top_diff_mpd; - if (top_diff_is_prv) { + if (top_diff_is_prv || bottom_data_is_prv) { prv_bwd_bottom_diff_mpd.reset(new MemPD(*init_bwd_bottom_md, engine)); prv_bwd_top_diff_mpd.reset(new MemPD(*init_bwd_top_md, engine)); } From 1a1505e006396a6bed314bb72fb221918b061527 Mon Sep 17 00:00:00 2001 From: "Gong, Jiong" Date: Wed, 19 Jul 2017 01:05:21 +0800 Subject: [PATCH 45/54] accumulate diff for scale+shift in mkl batch norm --- src/caffe/layers/mkl_batch_norm_layer.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/caffe/layers/mkl_batch_norm_layer.cpp b/src/caffe/layers/mkl_batch_norm_layer.cpp index 8a1e44ab8..6dce50243 100755 --- a/src/caffe/layers/mkl_batch_norm_layer.cpp +++ b/src/caffe/layers/mkl_batch_norm_layer.cpp @@ -467,11 +467,12 @@ void MKLBatchNormLayer::Backward_cpu( CHECK_EQ(e, E_SUCCESS); if (use_weight_bias_) { - caffe_cpu_copy(this->blobs_[3]->count(), - diffScaleShift_buffer_, this->blobs_[3]->mutable_cpu_diff()); + caffe_cpu_axpby(this->blobs_[3]->count(), (Dtype)1., + diffScaleShift_buffer_, (Dtype)1., this->blobs_[3]->mutable_cpu_diff()); if (bias_term_) - caffe_cpu_copy(this->blobs_[4]->count(), - diffScaleShift_buffer_ + channels_, this->blobs_[4]->mutable_cpu_diff()); + caffe_cpu_axpby(this->blobs_[4]->count(), (Dtype)1., + diffScaleShift_buffer_ + channels_, + (Dtype)1., this->blobs_[4]->mutable_cpu_diff()); else caffe_set(this->blobs_[4]->count(), static_cast(0), this->blobs_[4]->mutable_cpu_diff()); From cc884b0930443db667a5d94f6eb48bee2b54ce1b Mon Sep 17 00:00:00 2001 From: "Yu, Chong" Date: Tue, 18 Jul 2017 16:17:27 -0400 Subject: [PATCH 46/54] Fix the workaround of wrong passed primitive and memory in the MKLDNN relu layer. --- src/caffe/layers/mkldnn_relu_layer.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/caffe/layers/mkldnn_relu_layer.cpp b/src/caffe/layers/mkldnn_relu_layer.cpp index 9626fdc25..6e0f93b67 100644 --- a/src/caffe/layers/mkldnn_relu_layer.cpp +++ b/src/caffe/layers/mkldnn_relu_layer.cpp @@ -284,14 +284,13 @@ void MKLDNNReLULayer::InitReLUBwd(const vector*>& top bwd_bottom_diff_memory = bwd_bottom_diff->create_output_memory(inplace); reluBwd.reset(new relu_backward(*reluBwd_pd, *fwd_bottom_data_primitive, *bwd_top_diff_primitive, *bwd_bottom_diff_memory)); - //TODO: the transfer fix will lead AlexNet not converge. The root cause is the "inplace". - bwd_top_diff->set_mkldnn_primitive(reluBwd); //Wrong passed primitive! (TODO: Checking!) - //MKLDNNPrimitive bwd_top_diff_primitive_transfer(bwd_top_diff_primitive); - //bwd_top_diff->set_mkldnn_primitive(bwd_top_diff_primitive_transfer); - - bwd_bottom_diff->set_mkldnn_primitive(reluBwd); //Wrong passed primitive! (TODO: Checking!) - //MKLDNNPrimitive bwd_bottom_diff_memory_transfer(bwd_bottom_diff_memory); - //bwd_bottom_diff->set_mkldnn_primitive(bwd_bottom_diff_memory_transfer); + //bwd_top_diff->set_mkldnn_primitive(reluBwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_top_diff_primitive_transfer(bwd_top_diff_primitive); + bwd_top_diff->set_mkldnn_primitive(bwd_top_diff_primitive_transfer); + + //bwd_bottom_diff->set_mkldnn_primitive(reluBwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_bottom_diff_memory_transfer(bwd_bottom_diff_memory); + bwd_bottom_diff->set_mkldnn_primitive(bwd_bottom_diff_memory_transfer); } template From 9fbb62130f93ab201cb888258013a5e4b0f0fc57 Mon Sep 17 00:00:00 2001 From: "Shen, Haihao" Date: Wed, 19 Jul 2017 10:29:35 +0800 Subject: [PATCH 47/54] Support input type for model tuning --- examples/pycaffe/tune_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/pycaffe/tune_model.py b/examples/pycaffe/tune_model.py index d9350d69e..bb9e4bfdd 100644 --- a/examples/pycaffe/tune_model.py +++ b/examples/pycaffe/tune_model.py @@ -57,7 +57,7 @@ def tuneModelDefinition(model): if l.bottom[0] in output_layer_map.keys(): ic = output_layer_map[l.bottom[0]][4] output_layer_map[l.name] = (index, 0, 0, ic, oc, False) - elif l.type.endswith("Data"): + elif l.type.endswith("Data") or l.type.endswith("Input"): # TODO: correct the output # dynamic_net = caffe.Net(model, caffe.TEST) # for k, v in dynamic_net.blobs.items(): From c46092c826d10280533aeffffbd566cd782521e9 Mon Sep 17 00:00:00 2001 From: "Gong, Jiong" Date: Wed, 19 Jul 2017 17:41:32 +0800 Subject: [PATCH 48/54] param should be cleared before each step --- src/caffe/multinode/multi_solver.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/caffe/multinode/multi_solver.cpp b/src/caffe/multinode/multi_solver.cpp index 86e9b37ef..fda5f8156 100644 --- a/src/caffe/multinode/multi_solver.cpp +++ b/src/caffe/multinode/multi_solver.cpp @@ -61,8 +61,6 @@ Dtype MultiSolver::ForwardBackwardImpl(bool first, bool last) { std::vector& update_time_per_layer = root_solver_->update_time_per_layer; #endif /* CAFFE_PER_LAYER_TIMINGS */ - net.ClearParamDiffs(); - for (int i = 0; i < layers.size(); ++i) { #ifdef CAFFE_PER_LAYER_TIMINGS timer.Start(); @@ -128,6 +126,7 @@ Dtype MultiSolver::ForwardBackwardImpl(bool first, bool last) { template Dtype MultiSolver::ForwardBackward() { Dtype loss = 0; + root_solver_->net()->ClearParamDiffs(); for (int i = 0; i < iter_size; ++i) { loss += ForwardBackwardImpl( (i == 0), (i + 1 == iter_size)); From 8a80b3a966e4683e77c0ee4ec782ac560ccf0989 Mon Sep 17 00:00:00 2001 From: "Gong, Jiong" Date: Thu, 20 Jul 2017 07:03:17 +0800 Subject: [PATCH 49/54] fix the random seed bcast bug --- src/caffe/data_reader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/caffe/data_reader.cpp b/src/caffe/data_reader.cpp index c78ff39c4..69e8f20f5 100644 --- a/src/caffe/data_reader.cpp +++ b/src/caffe/data_reader.cpp @@ -194,7 +194,7 @@ DataReader::DBShuffle::DBShuffle(const LayerParameter& param):DBWrapper(param) { mn::Distribution * distrib = mn::get_distrib(); float fetch_seed; fetch_seed = static_cast(caffe_rng_rand() % 15); - distrib->bcast(&fetch_seed, sizeof(fetch_seed)); + distrib->bcast(&fetch_seed, 1); LOG(INFO) << "Random seed for shuffling: " << fetch_seed; prefetch_rng_.reset(new Caffe::RNG(static_cast(fetch_seed))); #else From 84b52caf6860ef2df334f475bfa38e3d738f5cad Mon Sep 17 00:00:00 2001 From: "Gong, Jiong" Date: Thu, 20 Jul 2017 20:09:09 +0800 Subject: [PATCH 50/54] allocate mkldnn memory with mlsl api in multinode mode --- include/caffe/mkldnn_memory.hpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/include/caffe/mkldnn_memory.hpp b/include/caffe/mkldnn_memory.hpp index 706dd3973..a59ce6e12 100644 --- a/include/caffe/mkldnn_memory.hpp +++ b/include/caffe/mkldnn_memory.hpp @@ -112,9 +112,21 @@ class MKLDNNMemoryDescriptorBase : public PrvMemDescr void allocate() { if (_prv_memory == NULL) { +#ifdef USE_MLSL + if (mn::is_multinode()) { + auto mlsl_free = [](char* p) { mn::free((void*)p); }; + _mlsl_memory.reset( + (char*)mn::alloc(_prv_memory_pd->get_size(), 64), mlsl_free); + _prv_memory = shared_ptr( + new memory(*_prv_memory_pd, (void*)_mlsl_memory.get())); + } else { +#endif _prv_memory = shared_ptr(new memory(*_prv_memory_pd)); - _internal_ptr = (Dtype *)(_prv_memory->get_data_handle()); - // TODO: may need initialize memory by 0 +#ifdef USE_MLSL + } +#endif + _internal_ptr = (Dtype *)(_prv_memory->get_data_handle()); + // TODO: may need initialize memory by 0 } } void set_prv_memory_pd(shared_ptr memory_pd) { @@ -156,6 +168,9 @@ class MKLDNNMemoryDescriptorBase : public PrvMemDescr MKLDNNLayer* _mkldnn_layer; Blob* _blob; +#ifdef USE_MLSL + shared_ptr _mlsl_memory; +#endif }; template From 11b47304640c98cf593178f1ebaa0d696e1925f6 Mon Sep 17 00:00:00 2001 From: "Yu, Chong" Date: Fri, 21 Jul 2017 15:42:52 +0800 Subject: [PATCH 51/54] Update the MKLDNN version to 171572a205.. --- mkldnn.commit | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkldnn.commit b/mkldnn.commit index e5352e307..7eb0167ed 100644 --- a/mkldnn.commit +++ b/mkldnn.commit @@ -1 +1 @@ -264ad6619810c196971f8cd46a9cbcd480979a48 +171572a205c71f5bbb08657de5660c9d06cf2d8f From 80f1595be9b9cb2c6b88df55e4c96f4418e2d87d Mon Sep 17 00:00:00 2001 From: fzou1 Date: Fri, 21 Jul 2017 10:23:23 +0800 Subject: [PATCH 52/54] Implement optimization of overlapping wait communication with forward; Support time measurement of staring and waiting communication Change-Id: I6c5af8d85fbb8a81353142752a37dfce8cd1870d --- Makefile | 6 +- cmake/Dependencies.cmake | 11 ++ include/caffe/multinode/multi_solver.hpp | 17 ++- include/caffe/multinode/multi_sync.hpp | 50 +++++++- include/caffe/solver.hpp | 8 ++ src/caffe/multinode/multi_solver.cpp | 150 ++++++++++++++++++----- src/caffe/multinode/multi_sync.cpp | 7 ++ src/caffe/solver.cpp | 67 +++++++++- 8 files changed, 279 insertions(+), 37 deletions(-) diff --git a/Makefile b/Makefile index 4d9eada0f..e2fae373d 100644 --- a/Makefile +++ b/Makefile @@ -77,11 +77,13 @@ ifeq ($(CAFFE_PER_LAYER_TIMINGS), 1) endif ifeq ($(CAFFE_MLSL_SHUFFLE), 1) - COMMON_FLAGS += -DCAFFE_MLSL_SHUFFLE + COMMON_FLAGS += -DCAFFE_MLSL_SHUFFLE endif +ifneq ($(FW_OVERLAP_OPT), 0) + COMMON_FLAGS += -DFW_OVERLAP_OPT +endif endif - #################### MLSL #################### diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 0d27a46f4..b8c5577c6 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -115,6 +115,17 @@ if(USE_MLSL) include_directories(SYSTEM "${MLSL_ROOT}/intel64/include") link_directories(SYSTEM "${MLSL_ROOT}/intel64/lib") list(APPEND Caffe_LINKER_LIBS mlsl) + + if(CAFFE_PER_LAYER_TIMINGS) + add_definitions("-DCAFFE_PER_LAYER_TIMINGS") + endif() + if(CAFFE_MLSL_SHUFFLE) + add_definitions("-DCAFFE_MLSL_SHUFFLE") + endif() + if(FW_OVERLAP_OPT OR NOT DEFINED FW_OVERLAP_OPT) + message(STATUS "Forward overlapping optimization is enabled!") + add_definitions("-DFW_OVERLAP_OPT") + endif() endif() # ---[ BLAS diff --git a/include/caffe/multinode/multi_solver.hpp b/include/caffe/multinode/multi_solver.hpp index 41b92665a..1b5664d5f 100644 --- a/include/caffe/multinode/multi_solver.hpp +++ b/include/caffe/multinode/multi_solver.hpp @@ -60,6 +60,12 @@ class MultiSolver { iter_size(root_solver_->param().iter_size()) { root_solver_->set_forward_backward( boost::bind(&MultiSolver::ForwardBackward, this)); +#ifdef FW_OVERLAP_OPT + Net& net = *root_solver_->net(); + const std::vector>> & layers{ net.layers() }; + layer_finished_flags_.resize(layers.size()); + std::fill(layer_finished_flags_.begin(), layer_finished_flags_.end(), false); +#endif } @@ -99,14 +105,23 @@ class MultiSolver { boost::shared_ptr> root_solver() { return root_solver_; } - +#ifdef FW_OVERLAP_OPT + void set_layer_finished_flag(int layer_id, bool flag) { + layer_finished_flags_[layer_id] = flag; + } +#endif private: virtual Dtype ForwardBackwardImpl(bool first, bool last); + bool IsSkipWaitGradient(int layer_id); + void WaitAndUpdateGradient(int layer_id); protected: boost::shared_ptr> root_solver_; int iter_size; vector callbacks_; +#ifdef FW_OVERLAP_OPT + vector layer_finished_flags_; +#endif }; } // namespace caffe diff --git a/include/caffe/multinode/multi_sync.hpp b/include/caffe/multinode/multi_sync.hpp index c46509149..d08f7f13c 100644 --- a/include/caffe/multinode/multi_sync.hpp +++ b/include/caffe/multinode/multi_sync.hpp @@ -74,6 +74,10 @@ namespace caffe { shared_ptr> net; const vector *> &net_params; vector> layer_param_ids; +#ifdef FW_OVERLAP_OPT + vector> param_ids_finished_flags; +#endif + // layer_id -> blob_id -> cached blob to restore // statistics vector>>> cached_stats; @@ -160,6 +164,12 @@ namespace caffe { << " ENABLED" #else << " DISABLED" +#endif + << ", FORWARD OVERLAP OPTIMIZATION IS" +#ifdef FW_OVERLAP_OPT + << " ENABLED" +#else + << " DISABLED" #endif << ", SINGLE DB SPLITTING IS" #ifdef CAFFE_MLSL_SHUFFLE @@ -201,6 +211,12 @@ namespace caffe { return; } +#ifdef FW_OVERLAP_OPT + std::fill(param_ids_finished_flags[layer_id].begin(), + param_ids_finished_flags[layer_id].end(), + false); +#endif + std::vector ¶m_ids = layer_param_ids[layer_id]; for (int i = 0; i < param_ids.size(); ++i) { if (!layer->ParamNeedReduce(i)) continue; @@ -215,15 +231,41 @@ namespace caffe { void on_delwt_wait(int layer_id) { boost::shared_ptr> &layer = layers[layer_id]; if (layer->layerOp == nullptr) { +#ifdef FW_OVERLAP_OPT + solver->set_layer_finished_flag(layer_id, true); +#endif return; } std::vector ¶m_ids = layer_param_ids[layer_id]; +#ifdef FW_OVERLAP_OPT + int finished_count = 0; +#endif + for (int i=0; iParamNeedReduce(i)) continue; + if (!layer->ParamNeedReduce(i) +#ifdef FW_OVERLAP_OPT + || (param_ids_finished_flags[layer_id][i] == true)) { + finished_count++; +#else + ) { +#endif + continue; + } + +#ifdef FW_OVERLAP_OPT + bool is_completed = false; + Dtype *delwt_buf{(Dtype *) layer->layerOp->GetParameterSet(i)->TestGradientComm(&is_completed)}; +#else Dtype *delwt_buf{(Dtype *) layer->layerOp->GetParameterSet(i)->WaitGradientComm()}; +#endif if (delwt_buf) { +#ifdef FW_OVERLAP_OPT + assert(is_completed); + param_ids_finished_flags[layer_id][i] = true; + finished_count++; +#endif if (CAN_USE_PRV(net_params[param_ids[i]])) { if (delwt_buf != net_params[param_ids[i]]->prv_diff()) caffe_copy(net_params[param_ids[i]]->count(), @@ -235,6 +277,12 @@ namespace caffe { net_params[param_ids[i]]->mutable_cpu_diff()); } } + +#ifdef FW_OVERLAP_OPT + if (finished_count == param_ids.size()) { + solver->set_layer_finished_flag(layer_id, true); + } +#endif } void on_gradients_ready() { diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index 05413a6c9..9b97c3c0b 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -165,10 +165,18 @@ class Solver { std::vector forward_time_per_layer; std::vector backward_time_per_layer; std::vector update_time_per_layer; +#ifdef USE_MLSL + std::vector startcomm_time_per_layer; + std::vector waitcomm_time_per_layer; +#endif std::vector forward_time_per_layer_total; std::vector backward_time_per_layer_total; std::vector update_time_per_layer_total; +#ifdef USE_MLSL + std::vector startcomm_time_per_layer_total; + std::vector waitcomm_time_per_layer_total; +#endif void InitTimers(); void ResetTimers(); diff --git a/src/caffe/multinode/multi_solver.cpp b/src/caffe/multinode/multi_solver.cpp index fda5f8156..0516b27b5 100644 --- a/src/caffe/multinode/multi_solver.cpp +++ b/src/caffe/multinode/multi_solver.cpp @@ -46,6 +46,59 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace caffe { +#define START_ITER 1 + + +#ifdef CAFFE_PER_LAYER_TIMINGS +#define LAYER_TIMING_START() do { \ + timer.Start(); \ +}while(0) + +#define LAYER_TIMING_STOP(name, index) do { \ + name##_time_per_layer[index] += timer.MicroSeconds(); \ +}while(0) +#else +#define LAYER_TIMING_START() + +#define LAYER_TIMING_STOP(name,index) +#endif + +template +inline bool MultiSolver::IsSkipWaitGradient(int layer_id) { + Net& net = *root_solver_->net(); + const std::vector>>& layers{ net.layers() }; + const std::vector& layer_need_backward{ net.layer_need_backward() }; + + if (!layer_need_backward[layer_id] || ((layers[layer_id]->layerOp != nullptr) + && !layers[layer_id]->layerOp->HasParameterSets())) { + DLOG(INFO) << "ForwardBackwardImpl: no need for apply_updates for layer # " + << layer_id << ", skip on_delwt_wait, apply_updates, on_wtinc_ready"; + return true; + } + return false; +} + +template +inline void MultiSolver::WaitAndUpdateGradient(int layer_id) { + LAYER_TIMING_START(); + for (int j = 0; j < callbacks_.size(); ++j) { + callbacks_[j]->on_delwt_wait(layer_id); + } + LAYER_TIMING_STOP(waitcomm, layer_id); + +#ifdef FW_OVERLAP_OPT + if (layer_finished_flags_[layer_id]) { +#endif + LAYER_TIMING_START(); + for (int j = 0; j < callbacks_.size(); ++j) { + callbacks_[j]->apply_updates(layer_id); + } + LAYER_TIMING_STOP(update, layer_id); +#ifdef FW_OVERLAP_OPT + } +#endif +} + template Dtype MultiSolver::ForwardBackwardImpl(bool first, bool last) { @@ -53,70 +106,107 @@ Dtype MultiSolver::ForwardBackwardImpl(bool first, bool last) { Net& net = *root_solver_->net(); const std::vector>>& layers{ net.layers() }; const std::vector& layer_need_backward{ net.layer_need_backward() }; +#ifdef FW_OVERLAP_OPT + int iter = root_solver_->iter(); +#endif #ifdef CAFFE_PER_LAYER_TIMINGS Timer& timer = root_solver_->timer; std::vector& forward_time_per_layer = root_solver_->forward_time_per_layer; std::vector& backward_time_per_layer = root_solver_->backward_time_per_layer; std::vector& update_time_per_layer = root_solver_->update_time_per_layer; + std::vector& startcomm_time_per_layer = root_solver_->startcomm_time_per_layer; + std::vector& waitcomm_time_per_layer = root_solver_->waitcomm_time_per_layer; #endif /* CAFFE_PER_LAYER_TIMINGS */ + for (int i = 0; i < layers.size(); ++i) { -#ifdef CAFFE_PER_LAYER_TIMINGS - timer.Start(); +#ifdef FW_OVERLAP_OPT + if (first && iter >= START_ITER + 1) { + while (layer_finished_flags_[i] == false) { + if (IsSkipWaitGradient(i)) { + break; + } + + WaitAndUpdateGradient(i); + if (layer_finished_flags_[i]) { + break; + } + + for (int k=i+1; k= 0; --i) { -#ifdef CAFFE_PER_LAYER_TIMINGS - timer.Start(); -#endif - if (!layer_need_backward[i]) { continue; } + + LAYER_TIMING_START(); net.BackwardFromTo(i, i); + LAYER_TIMING_STOP(backward, i); + if (last && (layers[i]->layerOp != nullptr) && layers[i]->layerOp->HasParameterSets()) { + LAYER_TIMING_START(); for (int j = 0; j < callbacks_.size(); ++j) { - callbacks_[j]->on_iter_finished(i); + callbacks_[j]->on_iter_finished(i); } + LAYER_TIMING_STOP(startcomm, i); } - -#ifdef CAFFE_PER_LAYER_TIMINGS - backward_time_per_layer[i] += timer.MicroSeconds(); -#endif } +#ifdef FW_OVERLAP_OPT + int max_iter = root_solver_->param().max_iter(); + bool test = (root_solver_->param().test_interval() + && ((iter + 1) % root_solver_->param().test_interval() == 0)); + if (last && (test || (iter == max_iter - 1))) { + int finished_count = 0; + while (finished_count < layers.size()) { +#else if (last) { +#endif for (int i = 0; i < layers.size(); ++i) { -#ifdef CAFFE_PER_LAYER_TIMINGS - timer.Start(); +#ifdef FW_OVERLAP_OPT + if (layer_finished_flags_[i]) + continue; #endif - if (!layer_need_backward[i] || ((layers[i]->layerOp != nullptr) && !layers[i]->layerOp->HasParameterSets())) { - DLOG(INFO) << "ForwardBackwardImpl: no need for apply_updates for layer # " << i - << ", skip on_delwt_wait, apply_updates, on_wtinc_ready"; - continue; - } + if (IsSkipWaitGradient(i)) { +#ifdef FW_OVERLAP_OPT + finished_count++; + layer_finished_flags_[i] = true; +#endif + continue; + } - for (int j = 0; j < callbacks_.size(); ++j) { - callbacks_[j]->on_delwt_wait(i); - } + WaitAndUpdateGradient(i); - for (int j = 0; j < callbacks_.size(); ++j) { - callbacks_[j]->apply_updates(i); - } -#ifdef CAFFE_PER_LAYER_TIMINGS - update_time_per_layer[i] += timer.MicroSeconds(); +#ifdef FW_OVERLAP_OPT + if (layer_finished_flags_[i]) + finished_count++; #endif + } +#ifdef FW_OVERLAP_OPT } +#endif } DLOG(WARNING) << "iter " << root_solver_->iter() << ", loss " << loss; diff --git a/src/caffe/multinode/multi_sync.cpp b/src/caffe/multinode/multi_sync.cpp index eb6229ed4..448172c7b 100644 --- a/src/caffe/multinode/multi_sync.cpp +++ b/src/caffe/multinode/multi_sync.cpp @@ -53,12 +53,19 @@ MultiSync::MultiSync(shared_ptr > root_solver) root_solver->set_iter(1); layer_param_ids.resize(layers.size()); +#ifdef FW_OVERLAP_OPT + param_ids_finished_flags.resize(layers.size()); +#endif for (int layer_id = 0; layer_id < layers.size(); layer_id++) { shared_ptr > layer = layers[layer_id]; /* cache param ids */ layer_param_ids[layer_id] = net->get_layer_learnable_param_ids(layer_id); +#ifdef FW_OVERLAP_OPT + param_ids_finished_flags[layer_id].resize(layer_param_ids[layer_id].size()); + std::fill(param_ids_finished_flags[layer_id].begin(), param_ids_finished_flags[layer_id].end(), false); +#endif } } diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index cf8c31b47..3f17c5c58 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -394,10 +394,17 @@ void Solver::InitTimers() { this->forward_time_per_layer.resize(layer_count, 0.0); this->backward_time_per_layer.resize(layer_count, 0.0); this->update_time_per_layer.resize(layer_count, 0.0); - +#ifdef USE_MLSL + this->startcomm_time_per_layer.resize(layer_count, 0.0); + this->waitcomm_time_per_layer.resize(layer_count, 0.0); +#endif this->forward_time_per_layer_total.resize(layer_count, 0.0); this->backward_time_per_layer_total.resize(layer_count, 0.0); this->update_time_per_layer_total.resize(layer_count, 0.0); +#ifdef USE_MLSL + this->startcomm_time_per_layer_total.resize(layer_count, 0.0); + this->waitcomm_time_per_layer_total.resize(layer_count, 0.0); +#endif } template @@ -419,6 +426,19 @@ void Solver::ResetTimers() { this->update_time_per_layer.begin(), this->update_time_per_layer_total.begin(), std::plus()); +#ifdef USE_MLSL + std::transform(this->startcomm_time_per_layer_total.begin(), + this->startcomm_time_per_layer_total.end(), + this->startcomm_time_per_layer.begin(), + this->startcomm_time_per_layer_total.begin(), + std::plus()); + + std::transform(this->waitcomm_time_per_layer_total.begin(), + this->waitcomm_time_per_layer_total.end(), + this->waitcomm_time_per_layer.begin(), + this->waitcomm_time_per_layer_total.begin(), + std::plus()); +#endif std::fill(this->forward_time_per_layer.begin(), this->forward_time_per_layer.end(), 0); @@ -426,6 +446,12 @@ void Solver::ResetTimers() { this->backward_time_per_layer.end(), 0); std::fill(this->update_time_per_layer.begin(), this->update_time_per_layer.end(), 0); +#ifdef USE_MLSL + std::fill(this->startcomm_time_per_layer.begin(), + this->startcomm_time_per_layer.end(), 0); + std::fill(this->waitcomm_time_per_layer.begin(), + this->waitcomm_time_per_layer.end(), 0); +#endif } template @@ -444,7 +470,13 @@ void Solver::PrintTimers(bool printTotal) { backward_time_per_layer_total : backward_time_per_layer; std::vector& update_timers = printTotal ? update_time_per_layer_total : update_time_per_layer; +#ifdef USE_MLSL + std::vector& startcomm_timers = printTotal ? + startcomm_time_per_layer_total : startcomm_time_per_layer; + std::vector& waitcomm_timers = printTotal ? + waitcomm_time_per_layer_total : waitcomm_time_per_layer; std::string prefix = printTotal ? "TOTAL " : "DELTA "; +#endif double forward_time = std::accumulate(forward_timers.begin(), forward_timers.end(), 0) / 1000; @@ -479,8 +511,37 @@ void Solver::PrintTimers(bool printTotal) { } LOG(WARNING) << std::endl; - LOG(WARNING) << prefix << "TIME (F+B+U): " << (forward_time + - backward_time + update_time) / 1000 << " sec"; +#ifdef USE_MLSL + double startcomm_time = std::accumulate(startcomm_timers.begin(), + startcomm_timers.end(), 0) / 1000; + LOG(WARNING) << prefix << "START COMMUNICATION TIME: " << startcomm_time << " ms"; + for (int layer_idx = 0; layer_idx < net_->layers().size(); layer_idx++) { + LOG(WARNING) << "LAYER-" << layer_idx << " " + << net_->layers()[layer_idx]->type() + << ": startcomm_time: " << startcomm_timers[layer_idx] / 1000 + << " ms"; + } + LOG(WARNING) << std::endl; + + double waitcomm_time = std::accumulate(waitcomm_timers.begin(), + waitcomm_timers.end(), 0) / 1000; + LOG(WARNING) << prefix << "WAIT COMMUNICATION TIME: " << waitcomm_time << " ms"; + for (int layer_idx = 0; layer_idx < net_->layers().size(); layer_idx++) { + LOG(WARNING) << "LAYER-" << layer_idx << " " + << net_->layers()[layer_idx]->type() + << ": waitcomm_time: " << waitcomm_timers[layer_idx] / 1000 + << " ms"; + } + LOG(WARNING) << std::endl; + + LOG(WARNING) << prefix << "TIME (Computation + Communication): " << (forward_time + + backward_time + update_time + startcomm_time + waitcomm_time) / 1000 + << " sec"; +#else + LOG(WARNING) << prefix << "TIME (Computation): " << (forward_time + + backward_time + update_time) / 1000 << " sec"; +#endif + LOG(WARNING) << "####################################################"; LOG(WARNING) << std::endl; } From 65b29ef39b24b016f0c125e9827c65f097565da9 Mon Sep 17 00:00:00 2001 From: fzou1 Date: Sat, 22 Jul 2017 23:57:25 +0800 Subject: [PATCH 53/54] fix hang issue if resuming training and compilation issue as macro is called in another function than ForwardBackwardImpl --- include/caffe/multinode/multi_solver.hpp | 2 +- include/caffe/multinode/multi_sync.hpp | 20 +++++----- src/caffe/multinode/multi_solver.cpp | 50 ++++++------------------ 3 files changed, 23 insertions(+), 49 deletions(-) diff --git a/include/caffe/multinode/multi_solver.hpp b/include/caffe/multinode/multi_solver.hpp index 1b5664d5f..5d2082821 100644 --- a/include/caffe/multinode/multi_solver.hpp +++ b/include/caffe/multinode/multi_solver.hpp @@ -64,7 +64,7 @@ class MultiSolver { Net& net = *root_solver_->net(); const std::vector>> & layers{ net.layers() }; layer_finished_flags_.resize(layers.size()); - std::fill(layer_finished_flags_.begin(), layer_finished_flags_.end(), false); + std::fill(layer_finished_flags_.begin(), layer_finished_flags_.end(), true); #endif } diff --git a/include/caffe/multinode/multi_sync.hpp b/include/caffe/multinode/multi_sync.hpp index d08f7f13c..b979e89fe 100644 --- a/include/caffe/multinode/multi_sync.hpp +++ b/include/caffe/multinode/multi_sync.hpp @@ -182,15 +182,15 @@ namespace caffe { mn::train::commit(); #ifdef PERFORMANCE_MONITORING - statsIterResult.resize(caffe::mn::train::get_session().get_operation_count()); - caffe::mn::train::stats::start(); + statsIterResult.resize(caffe::mn::train::get_session().get_operation_count()); + caffe::mn::train::stats::start(); #endif solver->add_callback(this); solver->Solve(); #ifdef PERFORMANCE_MONITORING - dump_stats_to_file(); + dump_stats_to_file(); #endif } @@ -206,6 +206,10 @@ namespace caffe { } void on_iter_finished(int layer_id) { +#ifdef FW_OVERLAP_OPT + solver->set_layer_finished_flag(layer_id, false); +#endif + boost::shared_ptr> &layer = layers[layer_id]; if (layer->layerOp == nullptr) { return; @@ -238,16 +242,11 @@ namespace caffe { } std::vector ¶m_ids = layer_param_ids[layer_id]; - -#ifdef FW_OVERLAP_OPT - int finished_count = 0; -#endif - for (int i=0; iParamNeedReduce(i) #ifdef FW_OVERLAP_OPT || (param_ids_finished_flags[layer_id][i] == true)) { - finished_count++; + param_ids_finished_flags[layer_id][i] = true; #else ) { #endif @@ -264,7 +263,6 @@ namespace caffe { #ifdef FW_OVERLAP_OPT assert(is_completed); param_ids_finished_flags[layer_id][i] = true; - finished_count++; #endif if (CAN_USE_PRV(net_params[param_ids[i]])) { if (delwt_buf != net_params[param_ids[i]]->prv_diff()) @@ -279,6 +277,8 @@ namespace caffe { } #ifdef FW_OVERLAP_OPT + int finished_count = std::count(param_ids_finished_flags[layer_id].begin(), + param_ids_finished_flags[layer_id].end(), true); if (finished_count == param_ids.size()) { solver->set_layer_finished_flag(layer_id, true); } diff --git a/src/caffe/multinode/multi_solver.cpp b/src/caffe/multinode/multi_solver.cpp index 0516b27b5..13ad8da2b 100644 --- a/src/caffe/multinode/multi_solver.cpp +++ b/src/caffe/multinode/multi_solver.cpp @@ -46,16 +46,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace caffe { -#define START_ITER 1 - - #ifdef CAFFE_PER_LAYER_TIMINGS #define LAYER_TIMING_START() do { \ - timer.Start(); \ + root_solver_->timer.Start(); \ }while(0) #define LAYER_TIMING_STOP(name, index) do { \ - name##_time_per_layer[index] += timer.MicroSeconds(); \ + root_solver_->name##_time_per_layer[index] += root_solver_->timer.MicroSeconds(); \ }while(0) #else #define LAYER_TIMING_START() @@ -101,50 +98,29 @@ inline void MultiSolver::WaitAndUpdateGradient(int layer_id) { template Dtype MultiSolver::ForwardBackwardImpl(bool first, bool last) { - Dtype loss = 0; Net& net = *root_solver_->net(); const std::vector>>& layers{ net.layers() }; const std::vector& layer_need_backward{ net.layer_need_backward() }; -#ifdef FW_OVERLAP_OPT - int iter = root_solver_->iter(); -#endif - -#ifdef CAFFE_PER_LAYER_TIMINGS - Timer& timer = root_solver_->timer; - std::vector& forward_time_per_layer = root_solver_->forward_time_per_layer; - std::vector& backward_time_per_layer = root_solver_->backward_time_per_layer; - std::vector& update_time_per_layer = root_solver_->update_time_per_layer; - std::vector& startcomm_time_per_layer = root_solver_->startcomm_time_per_layer; - std::vector& waitcomm_time_per_layer = root_solver_->waitcomm_time_per_layer; -#endif /* CAFFE_PER_LAYER_TIMINGS */ - for (int i = 0; i < layers.size(); ++i) { #ifdef FW_OVERLAP_OPT - if (first && iter >= START_ITER + 1) { + if (first && IsSkipWaitGradient(i) == false) { while (layer_finished_flags_[i] == false) { - if (IsSkipWaitGradient(i)) { - break; - } - WaitAndUpdateGradient(i); - if (layer_finished_flags_[i]) { + if (layer_finished_flags_[i]) break; - } for (int k=i+1; k::ForwardBackwardImpl(bool first, bool last) { } LAYER_TIMING_START(); - net.BackwardFromTo(i, i); - LAYER_TIMING_STOP(backward, i); - if (last && (layers[i]->layerOp != nullptr) && layers[i]->layerOp->HasParameterSets()) { + if (last && (layers[i]->layerOp != nullptr) + && layers[i]->layerOp->HasParameterSets()) { LAYER_TIMING_START(); for (int j = 0; j < callbacks_.size(); ++j) { callbacks_[j]->on_iter_finished(i); @@ -174,6 +149,7 @@ Dtype MultiSolver::ForwardBackwardImpl(bool first, bool last) { } #ifdef FW_OVERLAP_OPT + int iter = root_solver_->iter(); int max_iter = root_solver_->param().max_iter(); bool test = (root_solver_->param().test_interval() && ((iter + 1) % root_solver_->param().test_interval() == 0)); @@ -183,12 +159,7 @@ Dtype MultiSolver::ForwardBackwardImpl(bool first, bool last) { #else if (last) { #endif - - for (int i = 0; i < layers.size(); ++i) { -#ifdef FW_OVERLAP_OPT - if (layer_finished_flags_[i]) - continue; -#endif + for (int i = 0; i < layers.size(); ++i) { if (IsSkipWaitGradient(i)) { #ifdef FW_OVERLAP_OPT finished_count++; @@ -196,9 +167,12 @@ Dtype MultiSolver::ForwardBackwardImpl(bool first, bool last) { #endif continue; } +#ifdef FW_OVERLAP_OPT + if (layer_finished_flags_[i]) + continue; +#endif WaitAndUpdateGradient(i); - #ifdef FW_OVERLAP_OPT if (layer_finished_flags_[i]) finished_count++; From 329b5548d15e8bfe1d8db2e1c16eb5f8d614e2a2 Mon Sep 17 00:00:00 2001 From: fzou1 Date: Mon, 24 Jul 2017 10:16:53 +0800 Subject: [PATCH 54/54] disable overlapping forward optimization by default --- Makefile | 2 +- cmake/Dependencies.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index e2fae373d..fd6e78bc8 100644 --- a/Makefile +++ b/Makefile @@ -80,7 +80,7 @@ ifeq ($(CAFFE_MLSL_SHUFFLE), 1) COMMON_FLAGS += -DCAFFE_MLSL_SHUFFLE endif -ifneq ($(FW_OVERLAP_OPT), 0) +ifeq ($(FW_OVERLAP_OPT), 1) COMMON_FLAGS += -DFW_OVERLAP_OPT endif endif diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index b8c5577c6..67adf4ba7 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -122,7 +122,7 @@ if(USE_MLSL) if(CAFFE_MLSL_SHUFFLE) add_definitions("-DCAFFE_MLSL_SHUFFLE") endif() - if(FW_OVERLAP_OPT OR NOT DEFINED FW_OVERLAP_OPT) + if(FW_OVERLAP_OPT) message(STATUS "Forward overlapping optimization is enabled!") add_definitions("-DFW_OVERLAP_OPT") endif()