diff --git a/Makefile b/Makefile index 4d9eada0f..fd6e78bc8 100644 --- a/Makefile +++ b/Makefile @@ -77,11 +77,13 @@ ifeq ($(CAFFE_PER_LAYER_TIMINGS), 1) endif ifeq ($(CAFFE_MLSL_SHUFFLE), 1) - COMMON_FLAGS += -DCAFFE_MLSL_SHUFFLE + COMMON_FLAGS += -DCAFFE_MLSL_SHUFFLE endif +ifeq ($(FW_OVERLAP_OPT), 1) + COMMON_FLAGS += -DFW_OVERLAP_OPT +endif endif - #################### MLSL #################### diff --git a/Makefile.mkldnn b/Makefile.mkldnn index 51f7fcab6..ec1a70bc5 100644 --- a/Makefile.mkldnn +++ b/Makefile.mkldnn @@ -32,7 +32,7 @@ mkldnn_download: mkldnn_build: mkldnn_download cmake $(MKLDNN_CMAKE_FLAGS) - make -C $(CAFFE_ROOTDIR)/$(MKLDNN_BUILDDIR) + make -C $(CAFFE_ROOTDIR)/$(MKLDNN_BUILDDIR) -j$(shell cat /proc/cpuinfo |grep 'processor'|wc -l) make -C $(CAFFE_ROOTDIR)/$(MKLDNN_BUILDDIR) install else mkldnn_download: diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 0d27a46f4..67adf4ba7 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -115,6 +115,17 @@ if(USE_MLSL) include_directories(SYSTEM "${MLSL_ROOT}/intel64/include") link_directories(SYSTEM "${MLSL_ROOT}/intel64/lib") list(APPEND Caffe_LINKER_LIBS mlsl) + + if(CAFFE_PER_LAYER_TIMINGS) + add_definitions("-DCAFFE_PER_LAYER_TIMINGS") + endif() + if(CAFFE_MLSL_SHUFFLE) + add_definitions("-DCAFFE_MLSL_SHUFFLE") + endif() + if(FW_OVERLAP_OPT) + message(STATUS "Forward overlapping optimization is enabled!") + add_definitions("-DFW_OVERLAP_OPT") + endif() endif() # ---[ BLAS diff --git a/cmake/MKLDNN.cmake b/cmake/MKLDNN.cmake index 97000b7a5..43c51f7ee 100644 --- a/cmake/MKLDNN.cmake +++ b/cmake/MKLDNN.cmake @@ -8,7 +8,14 @@ function(Download_MKLDNN) execute_process(COMMAND cat mkldnn.commit WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE MKLDNN_COMMIT) - + + include(ProcessorCount) + ProcessorCount(NCORE) + if(NOT NCORE EQUAL 0) + set(CTEST_BUILD_FLAGS -j${NCORE}) + set(ctest_test_args ${ctest_test_args} PARALLEL_LEVEL ${NCORE}) + endif() + ExternalProject_add(MKLDNN_Build SOURCE_DIR ${MKLDNN_SOURCE_DIR} CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} -DMKLROOT=${MKL_ROOT_DIR} @@ -20,7 +27,7 @@ function(Download_MKLDNN) BUILD_COMMAND cmake ${MKLDNN_SOURCE_DIR} #--Install step INSTALL_DIR ${MKLDNN_INSTALL_DIR} - INSTALL_COMMAND make install + INSTALL_COMMAND make install -j${NCORE} LOG_CONFIGURE 1 LOG_BUILD 1 LOG_INSTALL 1 diff --git a/examples/LRCN_activity_recognition/train_test_singleFrame_RGB.prototxt b/examples/LRCN_activity_recognition/train_test_singleFrame_RGB.prototxt index 8663afe45..26e4ddc55 100644 --- a/examples/LRCN_activity_recognition/train_test_singleFrame_RGB.prototxt +++ b/examples/LRCN_activity_recognition/train_test_singleFrame_RGB.prototxt @@ -13,7 +13,6 @@ layer { mean_value: 103.939 mean_value: 116.779 mean_value: 123.68 - flow: false } image_data_param { source: "ucf101_singleFrame_RGB_train_split1.txt" @@ -38,7 +37,6 @@ layer { mean_value: 103.939 mean_value: 116.779 mean_value: 123.68 - flow: false } image_data_param { source: "ucf101_singleFrame_RGB_test_split1.txt" diff --git a/examples/pycaffe/tune_engine.py b/examples/pycaffe/tune_engine.py new file mode 100755 index 000000000..850b94929 --- /dev/null +++ b/examples/pycaffe/tune_engine.py @@ -0,0 +1,190 @@ +import os +import sys +import copy +import argparse + +from caffe.proto import caffe_pb2 +import google.protobuf.text_format as txtf + +def readFile(filePath): + lines = [] + file = open(filePath, 'r') + for line in file.readlines(): + lines.append(line) + file.close() + + return lines + +def writeFile(filePath, lines): + file = open(filePath, 'w+') + file.write(lines) + file.close() + +def parseLog(log): + lines = readFile(log) + model_start = False + time_start = False + model_lines = [] + time_lines = [] + for line in lines: + trim_line = line.strip() + if trim_line.endswith("Initializing net from parameters:"): + model_start = True + continue + if model_start: + if trim_line.find("Creating layer") <> -1: + model_start = False + continue + model_lines.append(line) + + if trim_line.endswith("Average time per layer:"): + time_start = True + continue + if time_start: + if trim_line.find("Average Forward pass") <> -1: + time_start = False + break + time_lines.append(line) + + model_lines = model_lines[1:] + model_str = "" + for line in model_lines: + model_str = model_str + line + + return (model_str, time_lines) + +def parseTimeLines(timeLines): + layer_map = {} + for line in timeLines: + trim_line = line.strip() + items = trim_line.split("\t") + layer_items = items[0].split(" ") + layer_name = layer_items[-1] + time_items = items[1].split(" ") + if layer_name not in layer_map.keys(): + layer_map[layer_name] = (float)(time_items[1]) + else: + layer_map[layer_name] = layer_map[layer_name] + (float)(time_items[1]) + + return layer_map + +def parseModelStr(modelStr): + net = caffe_pb2.NetParameter() + txtf.Merge(modelStr, net) + layer_model_map = {} + global_engine = "CAFFE" + if net.engine != "": + global_engine = net.engine + for index in range(0, len(net.layer)): + engine = global_engine + l = net.layer[index] + if l.engine != "": + engine = l.engine + param_engine = -1 + if l.type == "Convolution" or l.type == "Deconvolution": + if l.convolution_param.engine != "": + param_engine = l.convolution_param.engine + elif l.type == "BatchNorm": + if l.batch_norm_param.engine != "": + param_engine = l.batch_norm_param.engine + elif l.type == "Concat": + if l.concat_param.engine != "": + param_engine = l.concat_param.engine + elif l.type == "Eltwise": + if l.eltwise_param.engine != "": + param_engine = l.eltwise_param.engine + elif l.type == "InnerProduct": + if l.inner_product_param.engine != "": + param_engine = l.inner_product_param.engine + elif l.type == "LRN": + if l.lrn_param.engine != "": + param_engine = l.lrn_param.engine + elif l.type == "Pooling": + if l.pooling_param.engine != "": + param_engine = l.pooling_param.engine + elif l.type == "ReLU": + if l.relu_param.engine != "": + param_engine = l.relu_param.engine + + if param_engine == 0 or param_engine == 1: + engine = "CAFFE" + elif param_engine == 3: + engine = "MKL2017" + elif param_engine == 4: + engine = "MKLDNN" + layer_model_map[l.name] = (index, engine, l) + + return (net, layer_model_map) + +def selectOptimalEngine(layers): + optimal_layer = None + min_time = sys.float_info.max + for layer in layers: + if layer[2] < min_time: + min_time = layer[2] + optimal_layer = layer + + return optimal_layer + +def tuneEngine(logs, model): + if len(logs) <= 1: + print "[ERROR] Please specify two or more log files" + exit(1) + + for log in logs: + if not os.path.exists(log): + print "[ERROR] Please specify valid log file:", log + exit(1) + + layer_map = {} + net = None + for log in logs: + log_name = os.path.basename(log) + (model_str, time_lines) = parseLog(log) + (net, layer_model_map) = parseModelStr(model_str) + layer_time_map = parseTimeLines(time_lines) + for k, v in layer_model_map.items(): + if k not in layer_map.keys(): + layer_map[k] = [(v[0], v[1], layer_time_map[k], v[2])] + else: + layer_map_v = layer_map[k] + layer_map_v.append((v[0], v[1], layer_time_map[k], v[2])) + layer_map[k] = layer_map_v + + optimal_layer_map = {} + for k, v in layer_map.items(): + optimal_layer = selectOptimalEngine(v) + assert(optimal_layer != None) + optimal_layer_map[optimal_layer[0]] = optimal_layer[3] + + genModel(net, model, optimal_layer_map) + +def genModel(net, model, optimal_layer_map): + net_str = "" + net_str += "name: \"" + net.name + "\"\n" + for index in range(0, len(net.layer)): + net_str += "layer {\n" + l = net.layer[index] + if l.type.endswith("Data"): + net_str += str(l) + "\n}\n" + continue + l = optimal_layer_map[index] + net_str += str(l) + "\n}\n" + with open(model, 'w') as f: + net = caffe_pb2.NetParameter() + txtf.Merge(net_str, net) + f.write(str(net)) + print "[INFO] Complete model engine tuning:", model + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + + parser.add_argument('-l', '--logs', nargs='+', help='require the caffe time logs', required=True) + + parser.add_argument('-o', '--output', action='store', dest='output', default="", + help='require the model output') + + parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.0') + + params = parser.parse_args() + tuneEngine(params.logs, params.output) diff --git a/examples/pycaffe/tune_model.py b/examples/pycaffe/tune_model.py new file mode 100644 index 000000000..bb9e4bfdd --- /dev/null +++ b/examples/pycaffe/tune_model.py @@ -0,0 +1,99 @@ +import os +import datetime +import copy +import argparse + +from caffe.proto import caffe_pb2 +import google.protobuf.text_format as txtf +import caffe + +def isWinogradApplicable(ic, oc, stride, kernel_size): + if ic % 16 != 0: + return False + if oc % 16 != 0: + return False + if stride != 1: + return False + if kernel_size != 3: + return False + + return True + +def genHybridModel(net, winogradLayers, modelName): + newNet = copy.deepcopy(net) + newNetName = modelName.split(".")[0] + "_hybrid.prototxt" + for layer in winogradLayers: + newNet.layer[layer].convolution_param.conv_algorithm = "winograd" + with open(newNetName, 'w') as f: + f.write(str(newNet)) + print "[INFO] Complete model tuning with Winograd:", newNetName + +def tuneModelDefinition(model): + net = caffe_pb2.NetParameter() + with open(model) as f: + s = f.read() + txtf.Merge(s, net) + + net.name = 'Tuned model of ' + net.name + output_layer_map = {} + for index in range(0, len(net.layer)): + l = net.layer[index] + if l.type == ("Convolution"): + stride = 0 + kernel_size = 0 + if len(l.convolution_param.stride) == 0: + stride = 1 + else: + stride = l.convolution_param.stride[0] + kernel_size = l.convolution_param.kernel_size[0] + ic = 0 + if l.bottom[0] in output_layer_map.keys(): + ic = output_layer_map[l.bottom[0]][4] + oc = l.convolution_param.num_output + output_layer_map[l.name] = (index, stride, kernel_size, ic, oc, True) + elif l.type == ("InnerProduct"): + oc = l.inner_product_param.num_output + ic = 0 + if l.bottom[0] in output_layer_map.keys(): + ic = output_layer_map[l.bottom[0]][4] + output_layer_map[l.name] = (index, 0, 0, ic, oc, False) + elif l.type.endswith("Data") or l.type.endswith("Input"): + # TODO: correct the output + # dynamic_net = caffe.Net(model, caffe.TEST) + # for k, v in dynamic_net.blobs.items(): + # dynamic_net_map[k] = v.data.shape + ic = oc = 3 + output_layer_map[l.name] = (index, 0, 0, ic, oc, False) + else: + ic = 0 + if l.bottom[0] in output_layer_map.keys(): + ic = output_layer_map[l.bottom[0]][4] + oc = ic + output_layer_map[l.name] = (index, 0, 0, ic, oc, False) + + winograd_convolutions = [] + for k,v in output_layer_map.items(): + if v[5] and isWinogradApplicable(v[3], v[4], v[1], v[2]): + winograd_convolutions.append(v[0]) + + if len(winograd_convolutions) > 0: + genHybridModel(net, winograd_convolutions, model) + else: + print "[INFO] No need to tune model with Winograd:", model + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + + parser.add_argument('-m', '--model', action='store', dest='model', default="", + help='require the model definition (prototxt)') + + parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.0') + + params = parser.parse_args() + + model = params.model + if not os.path.exists(params.model): + print "[ERROR] Please specify the model definition file with -m" + exit(1) + + tuneModelDefinition(model) diff --git a/include/caffe/data_reader.hpp b/include/caffe/data_reader.hpp index c700586be..dff77199f 100644 --- a/include/caffe/data_reader.hpp +++ b/include/caffe/data_reader.hpp @@ -129,6 +129,7 @@ class DataReader { const LayerParameter param_; BlockingQueue > new_queue_pairs_; + bool first_read_; friend class DataReader; diff --git a/include/caffe/data_transformer.hpp b/include/caffe/data_transformer.hpp index d95df5a0c..f709a281b 100644 --- a/include/caffe/data_transformer.hpp +++ b/include/caffe/data_transformer.hpp @@ -396,6 +396,11 @@ class DataTransformer { bool has_mean_values> void Transform(const Datum& datum, Dtype* transformed_data, NormalizedBBox* crop_bbox, RandNumbers& rand_num); + +#ifdef USE_OPENCV + void RandomResizeImage(const Datum& datum, Datum *resized_datum); + void RandomResizeImage(const cv::Mat& img, cv::Mat *resized_img); +#endif }; } // namespace caffe diff --git a/include/caffe/mkldnn_memory.hpp b/include/caffe/mkldnn_memory.hpp index 706dd3973..a59ce6e12 100644 --- a/include/caffe/mkldnn_memory.hpp +++ b/include/caffe/mkldnn_memory.hpp @@ -112,9 +112,21 @@ class MKLDNNMemoryDescriptorBase : public PrvMemDescr void allocate() { if (_prv_memory == NULL) { +#ifdef USE_MLSL + if (mn::is_multinode()) { + auto mlsl_free = [](char* p) { mn::free((void*)p); }; + _mlsl_memory.reset( + (char*)mn::alloc(_prv_memory_pd->get_size(), 64), mlsl_free); + _prv_memory = shared_ptr( + new memory(*_prv_memory_pd, (void*)_mlsl_memory.get())); + } else { +#endif _prv_memory = shared_ptr(new memory(*_prv_memory_pd)); - _internal_ptr = (Dtype *)(_prv_memory->get_data_handle()); - // TODO: may need initialize memory by 0 +#ifdef USE_MLSL + } +#endif + _internal_ptr = (Dtype *)(_prv_memory->get_data_handle()); + // TODO: may need initialize memory by 0 } } void set_prv_memory_pd(shared_ptr memory_pd) { @@ -156,6 +168,9 @@ class MKLDNNMemoryDescriptorBase : public PrvMemDescr MKLDNNLayer* _mkldnn_layer; Blob* _blob; +#ifdef USE_MLSL + shared_ptr _mlsl_memory; +#endif }; template diff --git a/include/caffe/multinode/mlsl.hpp b/include/caffe/multinode/mlsl.hpp index b135e4673..b0d3d13d6 100644 --- a/include/caffe/multinode/mlsl.hpp +++ b/include/caffe/multinode/mlsl.hpp @@ -48,6 +48,8 @@ namespace caffe { #define MLSL_DEFAULT_COLOR -1 + void init(int* argc, char** argv[]); + inline void free(void *addr) { return MLSL::Environment::GetEnv().Free(addr); } diff --git a/include/caffe/multinode/multi_solver.hpp b/include/caffe/multinode/multi_solver.hpp index 41b92665a..5d2082821 100644 --- a/include/caffe/multinode/multi_solver.hpp +++ b/include/caffe/multinode/multi_solver.hpp @@ -60,6 +60,12 @@ class MultiSolver { iter_size(root_solver_->param().iter_size()) { root_solver_->set_forward_backward( boost::bind(&MultiSolver::ForwardBackward, this)); +#ifdef FW_OVERLAP_OPT + Net& net = *root_solver_->net(); + const std::vector>> & layers{ net.layers() }; + layer_finished_flags_.resize(layers.size()); + std::fill(layer_finished_flags_.begin(), layer_finished_flags_.end(), true); +#endif } @@ -99,14 +105,23 @@ class MultiSolver { boost::shared_ptr> root_solver() { return root_solver_; } - +#ifdef FW_OVERLAP_OPT + void set_layer_finished_flag(int layer_id, bool flag) { + layer_finished_flags_[layer_id] = flag; + } +#endif private: virtual Dtype ForwardBackwardImpl(bool first, bool last); + bool IsSkipWaitGradient(int layer_id); + void WaitAndUpdateGradient(int layer_id); protected: boost::shared_ptr> root_solver_; int iter_size; vector callbacks_; +#ifdef FW_OVERLAP_OPT + vector layer_finished_flags_; +#endif }; } // namespace caffe diff --git a/include/caffe/multinode/multi_sync.hpp b/include/caffe/multinode/multi_sync.hpp index 2d4c566ae..b979e89fe 100644 --- a/include/caffe/multinode/multi_sync.hpp +++ b/include/caffe/multinode/multi_sync.hpp @@ -63,7 +63,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace caffe { -#define CAN_USE_PRV(param) false //(param->prv_diff() && (param->prv_diff_count() == param->count())) +#define CAN_USE_PRV(param) (param->prv_diff() && (param->prv_diff_count() == param->count())) template class MultiSync : public MultiSolver::Callback { @@ -74,6 +74,10 @@ namespace caffe { shared_ptr> net; const vector *> &net_params; vector> layer_param_ids; +#ifdef FW_OVERLAP_OPT + vector> param_ids_finished_flags; +#endif + // layer_id -> blob_id -> cached blob to restore // statistics vector>>> cached_stats; @@ -160,6 +164,12 @@ namespace caffe { << " ENABLED" #else << " DISABLED" +#endif + << ", FORWARD OVERLAP OPTIMIZATION IS" +#ifdef FW_OVERLAP_OPT + << " ENABLED" +#else + << " DISABLED" #endif << ", SINGLE DB SPLITTING IS" #ifdef CAFFE_MLSL_SHUFFLE @@ -172,15 +182,15 @@ namespace caffe { mn::train::commit(); #ifdef PERFORMANCE_MONITORING - statsIterResult.resize(caffe::mn::train::get_session().get_operation_count()); - caffe::mn::train::stats::start(); + statsIterResult.resize(caffe::mn::train::get_session().get_operation_count()); + caffe::mn::train::stats::start(); #endif solver->add_callback(this); solver->Solve(); #ifdef PERFORMANCE_MONITORING - dump_stats_to_file(); + dump_stats_to_file(); #endif } @@ -196,14 +206,24 @@ namespace caffe { } void on_iter_finished(int layer_id) { +#ifdef FW_OVERLAP_OPT + solver->set_layer_finished_flag(layer_id, false); +#endif + boost::shared_ptr> &layer = layers[layer_id]; if (layer->layerOp == nullptr) { return; } +#ifdef FW_OVERLAP_OPT + std::fill(param_ids_finished_flags[layer_id].begin(), + param_ids_finished_flags[layer_id].end(), + false); +#endif + std::vector ¶m_ids = layer_param_ids[layer_id]; for (int i = 0; i < param_ids.size(); ++i) { - if (!layer->ParamNeedReduce(param_ids[i])) continue; + if (!layer->ParamNeedReduce(i)) continue; if (CAN_USE_PRV(net_params[param_ids[i]])) { layer->layerOp->GetParameterSet(i)->StartGradientComm((void *) net_params[param_ids[i]]->mutable_prv_diff()); } else { @@ -215,15 +235,35 @@ namespace caffe { void on_delwt_wait(int layer_id) { boost::shared_ptr> &layer = layers[layer_id]; if (layer->layerOp == nullptr) { +#ifdef FW_OVERLAP_OPT + solver->set_layer_finished_flag(layer_id, true); +#endif return; } std::vector ¶m_ids = layer_param_ids[layer_id]; - for (int i=0; iParamNeedReduce(param_ids[i])) continue; + if (!layer->ParamNeedReduce(i) +#ifdef FW_OVERLAP_OPT + || (param_ids_finished_flags[layer_id][i] == true)) { + param_ids_finished_flags[layer_id][i] = true; +#else + ) { +#endif + continue; + } + +#ifdef FW_OVERLAP_OPT + bool is_completed = false; + Dtype *delwt_buf{(Dtype *) layer->layerOp->GetParameterSet(i)->TestGradientComm(&is_completed)}; +#else Dtype *delwt_buf{(Dtype *) layer->layerOp->GetParameterSet(i)->WaitGradientComm()}; +#endif if (delwt_buf) { +#ifdef FW_OVERLAP_OPT + assert(is_completed); + param_ids_finished_flags[layer_id][i] = true; +#endif if (CAN_USE_PRV(net_params[param_ids[i]])) { if (delwt_buf != net_params[param_ids[i]]->prv_diff()) caffe_copy(net_params[param_ids[i]]->count(), @@ -235,6 +275,14 @@ namespace caffe { net_params[param_ids[i]]->mutable_cpu_diff()); } } + +#ifdef FW_OVERLAP_OPT + int finished_count = std::count(param_ids_finished_flags[layer_id].begin(), + param_ids_finished_flags[layer_id].end(), true); + if (finished_count == param_ids.size()) { + solver->set_layer_finished_flag(layer_id, true); + } +#endif } void on_gradients_ready() { diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp index 0dc63436c..ba47be986 100644 --- a/include/caffe/net.hpp +++ b/include/caffe/net.hpp @@ -304,7 +304,6 @@ class Net { * @brief If find "Conv--BN--Scale" in current network, merge BN and Scale layer into Convolution * layers, this optimization only works in caffe TEST phase now. */ - static void RemoveBNScale(const NetParameter& param, NetParameter* param_compiled); static void GetBlobConsumers(std::vector &cnsmer_blobs, const string& blob_name_to_find, diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp index 05413a6c9..9b97c3c0b 100644 --- a/include/caffe/solver.hpp +++ b/include/caffe/solver.hpp @@ -165,10 +165,18 @@ class Solver { std::vector forward_time_per_layer; std::vector backward_time_per_layer; std::vector update_time_per_layer; +#ifdef USE_MLSL + std::vector startcomm_time_per_layer; + std::vector waitcomm_time_per_layer; +#endif std::vector forward_time_per_layer_total; std::vector backward_time_per_layer_total; std::vector update_time_per_layer_total; +#ifdef USE_MLSL + std::vector startcomm_time_per_layer_total; + std::vector waitcomm_time_per_layer_total; +#endif void InitTimers(); void ResetTimers(); diff --git a/include/caffe/util/remove_batch_norm.hpp b/include/caffe/util/remove_batch_norm.hpp index c2e92f40f..316a4c022 100644 --- a/include/caffe/util/remove_batch_norm.hpp +++ b/include/caffe/util/remove_batch_norm.hpp @@ -69,5 +69,7 @@ void AdjustConvLayer(LayerParameter &conv_layer, template void RecoverBNScaleMergedNet(NetParameter * net_param, NetParameter* recovered_net_param); +template +void RemoveBNScale(const NetParameter& param, NetParameter* param_compiled); } #endif diff --git a/mkldnn.commit b/mkldnn.commit index 4e6af52a3..7eb0167ed 100644 --- a/mkldnn.commit +++ b/mkldnn.commit @@ -1 +1 @@ -22bf25f29369d247098968837b21f3d1bdb2336e +171572a205c71f5bbb08657de5660c9d06cf2d8f diff --git a/models/intel_optimized_models/ssd/VGGNet/VOC0712/SSD_300x300/deploy_mkl2017.prototxt b/models/intel_optimized_models/ssd/VGGNet/VOC0712/SSD_300x300/deploy_mkl2017.prototxt new file mode 100644 index 000000000..7e2ddbbbb --- /dev/null +++ b/models/intel_optimized_models/ssd/VGGNet/VOC0712/SSD_300x300/deploy_mkl2017.prototxt @@ -0,0 +1,1626 @@ +name: "VGG_VOC0712_SSD_300x300_deploy" +input: "data" +input_shape { + dim: 1 + dim: 3 + dim: 300 + dim: 300 +} +layer { + engine: "MKL2017" + name: "conv1_1" + type: "Convolution" + bottom: "data" + top: "conv1_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "relu1_1" + type: "ReLU" + bottom: "conv1_1" + top: "conv1_1" +} +layer { + engine: "MKL2017" + name: "conv1_2" + type: "Convolution" + bottom: "conv1_1" + top: "conv1_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "relu1_2" + type: "ReLU" + bottom: "conv1_2" + top: "conv1_2" +} +layer { + engine: "MKL2017" + name: "pool1" + type: "Pooling" + bottom: "conv1_2" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + engine: "MKL2017" + name: "conv2_1" + type: "Convolution" + bottom: "pool1" + top: "conv2_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "relu2_1" + type: "ReLU" + bottom: "conv2_1" + top: "conv2_1" +} +layer { + engine: "MKL2017" + name: "conv2_2" + type: "Convolution" + bottom: "conv2_1" + top: "conv2_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "relu2_2" + type: "ReLU" + bottom: "conv2_2" + top: "conv2_2" +} +layer { + engine: "MKL2017" + name: "pool2" + type: "Pooling" + bottom: "conv2_2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + engine: "MKL2017" + name: "conv3_1" + type: "Convolution" + bottom: "pool2" + top: "conv3_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "relu3_1" + type: "ReLU" + bottom: "conv3_1" + top: "conv3_1" +} +layer { + engine: "MKL2017" + name: "conv3_2" + type: "Convolution" + bottom: "conv3_1" + top: "conv3_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "relu3_2" + type: "ReLU" + bottom: "conv3_2" + top: "conv3_2" +} +layer { + engine: "MKL2017" + name: "conv3_3" + type: "Convolution" + bottom: "conv3_2" + top: "conv3_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "relu3_3" + type: "ReLU" + bottom: "conv3_3" + top: "conv3_3" +} +layer { + engine: "MKL2017" + name: "pool3" + type: "Pooling" + bottom: "conv3_3" + top: "pool3" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + engine: "MKL2017" + name: "conv4_1" + type: "Convolution" + bottom: "pool3" + top: "conv4_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "relu4_1" + type: "ReLU" + bottom: "conv4_1" + top: "conv4_1" +} +layer { + engine: "MKL2017" + name: "conv4_2" + type: "Convolution" + bottom: "conv4_1" + top: "conv4_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "relu4_2" + type: "ReLU" + bottom: "conv4_2" + top: "conv4_2" +} +layer { + engine: "MKL2017" + name: "conv4_3" + type: "Convolution" + bottom: "conv4_2" + top: "conv4_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "relu4_3" + type: "ReLU" + bottom: "conv4_3" + top: "conv4_3" +} +layer { + engine: "MKL2017" + name: "pool4" + type: "Pooling" + bottom: "conv4_3" + top: "pool4" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + engine: "MKL2017" + name: "conv5_1" + type: "Convolution" + bottom: "pool4" + top: "conv5_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + dilation: 1 + } +} +layer { + engine: "MKL2017" + name: "relu5_1" + type: "ReLU" + bottom: "conv5_1" + top: "conv5_1" +} +layer { + engine: "MKL2017" + name: "conv5_2" + type: "Convolution" + bottom: "conv5_1" + top: "conv5_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + dilation: 1 + } +} +layer { + engine: "MKL2017" + name: "relu5_2" + type: "ReLU" + bottom: "conv5_2" + top: "conv5_2" +} +layer { + engine: "MKL2017" + name: "conv5_3" + type: "Convolution" + bottom: "conv5_2" + top: "conv5_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + dilation: 1 + } +} +layer { + engine: "MKL2017" + name: "relu5_3" + type: "ReLU" + bottom: "conv5_3" + top: "conv5_3" +} +layer { + engine: "MKL2017" + name: "pool5" + type: "Pooling" + bottom: "conv5_3" + top: "pool5" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +layer { + engine: "MKL2017" + name: "fc6" + type: "Convolution" + bottom: "pool5" + top: "fc6" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 1024 + pad: 6 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + dilation: 6 + } +} +layer { + engine: "MKL2017" + name: "relu6" + type: "ReLU" + bottom: "fc6" + top: "fc6" +} +layer { + engine: "MKL2017" + name: "fc7" + type: "Convolution" + bottom: "fc6" + top: "fc7" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 1024 + kernel_size: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "relu7" + type: "ReLU" + bottom: "fc7" + top: "fc7" +} +layer { + engine: "MKL2017" + name: "conv6_1" + type: "Convolution" + bottom: "fc7" + top: "conv6_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "conv6_1_relu" + type: "ReLU" + bottom: "conv6_1" + top: "conv6_1" +} +layer { + engine: "MKL2017" + name: "conv6_2" + type: "Convolution" + bottom: "conv6_1" + top: "conv6_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "conv6_2_relu" + type: "ReLU" + bottom: "conv6_2" + top: "conv6_2" +} +layer { + engine: "MKL2017" + name: "conv7_1" + type: "Convolution" + bottom: "conv6_2" + top: "conv7_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "conv7_1_relu" + type: "ReLU" + bottom: "conv7_1" + top: "conv7_1" +} +layer { + engine: "MKL2017" + name: "conv7_2" + type: "Convolution" + bottom: "conv7_1" + top: "conv7_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "conv7_2_relu" + type: "ReLU" + bottom: "conv7_2" + top: "conv7_2" +} +layer { + engine: "MKL2017" + name: "conv8_1" + type: "Convolution" + bottom: "conv7_2" + top: "conv8_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "conv8_1_relu" + type: "ReLU" + bottom: "conv8_1" + top: "conv8_1" +} +layer { + engine: "MKL2017" + name: "conv8_2" + type: "Convolution" + bottom: "conv8_1" + top: "conv8_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 0 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "conv8_2_relu" + type: "ReLU" + bottom: "conv8_2" + top: "conv8_2" +} +layer { + engine: "MKL2017" + name: "conv9_1" + type: "Convolution" + bottom: "conv8_2" + top: "conv9_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "conv9_1_relu" + type: "ReLU" + bottom: "conv9_1" + top: "conv9_1" +} +layer { + engine: "MKL2017" + name: "conv9_2" + type: "Convolution" + bottom: "conv9_1" + top: "conv9_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 0 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKL2017" + name: "conv9_2_relu" + type: "ReLU" + bottom: "conv9_2" + top: "conv9_2" +} +layer { + name: "conv4_3_norm" + type: "Normalize" + bottom: "conv4_3" + top: "conv4_3_norm" + norm_param { + across_spatial: false + scale_filler { + type: "constant" + value: 20 + } + channel_shared: false + } +} +layer { + name: "conv4_3_norm_mbox_loc" + type: "Convolution" + bottom: "conv4_3_norm" + top: "conv4_3_norm_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv4_3_norm_mbox_loc_perm" + type: "Permute" + bottom: "conv4_3_norm_mbox_loc" + top: "conv4_3_norm_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv4_3_norm_mbox_loc_flat" + type: "Flatten" + bottom: "conv4_3_norm_mbox_loc_perm" + top: "conv4_3_norm_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv4_3_norm_mbox_conf" + type: "Convolution" + bottom: "conv4_3_norm" + top: "conv4_3_norm_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 84 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv4_3_norm_mbox_conf_perm" + type: "Permute" + bottom: "conv4_3_norm_mbox_conf" + top: "conv4_3_norm_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv4_3_norm_mbox_conf_flat" + type: "Flatten" + bottom: "conv4_3_norm_mbox_conf_perm" + top: "conv4_3_norm_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv4_3_norm_mbox_priorbox" + type: "PriorBox" + bottom: "conv4_3_norm" + bottom: "data" + top: "conv4_3_norm_mbox_priorbox" + prior_box_param { + min_size: 30.0 + max_size: 60.0 + aspect_ratio: 2 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 8 + offset: 0.5 + } +} +layer { + engine: "MKL2017" + name: "fc7_mbox_loc" + type: "Convolution" + bottom: "fc7" + top: "fc7_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "fc7_mbox_loc_perm" + type: "Permute" + bottom: "fc7_mbox_loc" + top: "fc7_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "fc7_mbox_loc_flat" + type: "Flatten" + bottom: "fc7_mbox_loc_perm" + top: "fc7_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "fc7_mbox_conf" + type: "Convolution" + bottom: "fc7" + top: "fc7_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 126 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "fc7_mbox_conf_perm" + type: "Permute" + bottom: "fc7_mbox_conf" + top: "fc7_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "fc7_mbox_conf_flat" + type: "Flatten" + bottom: "fc7_mbox_conf_perm" + top: "fc7_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "fc7_mbox_priorbox" + type: "PriorBox" + bottom: "fc7" + bottom: "data" + top: "fc7_mbox_priorbox" + prior_box_param { + min_size: 60.0 + max_size: 111.0 + aspect_ratio: 2 + aspect_ratio: 3 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 16 + offset: 0.5 + } +} +layer { + engine: "MKL2017" + name: "conv6_2_mbox_loc" + type: "Convolution" + bottom: "conv6_2" + top: "conv6_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv6_2_mbox_loc_perm" + type: "Permute" + bottom: "conv6_2_mbox_loc" + top: "conv6_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv6_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv6_2_mbox_loc_perm" + top: "conv6_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv6_2_mbox_conf" + type: "Convolution" + bottom: "conv6_2" + top: "conv6_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 126 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv6_2_mbox_conf_perm" + type: "Permute" + bottom: "conv6_2_mbox_conf" + top: "conv6_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv6_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv6_2_mbox_conf_perm" + top: "conv6_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv6_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv6_2" + bottom: "data" + top: "conv6_2_mbox_priorbox" + prior_box_param { + min_size: 111.0 + max_size: 162.0 + aspect_ratio: 2 + aspect_ratio: 3 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 32 + offset: 0.5 + } +} +layer { + name: "conv7_2_mbox_loc" + type: "Convolution" + bottom: "conv7_2" + top: "conv7_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv7_2_mbox_loc_perm" + type: "Permute" + bottom: "conv7_2_mbox_loc" + top: "conv7_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv7_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv7_2_mbox_loc_perm" + top: "conv7_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + engine: "MKL2017" + name: "conv7_2_mbox_conf" + type: "Convolution" + bottom: "conv7_2" + top: "conv7_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 126 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv7_2_mbox_conf_perm" + type: "Permute" + bottom: "conv7_2_mbox_conf" + top: "conv7_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv7_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv7_2_mbox_conf_perm" + top: "conv7_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv7_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv7_2" + bottom: "data" + top: "conv7_2_mbox_priorbox" + prior_box_param { + min_size: 162.0 + max_size: 213.0 + aspect_ratio: 2 + aspect_ratio: 3 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 64 + offset: 0.5 + } +} +layer { + engine: "MKL2017" + name: "conv8_2_mbox_loc" + type: "Convolution" + bottom: "conv8_2" + top: "conv8_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv8_2_mbox_loc_perm" + type: "Permute" + bottom: "conv8_2_mbox_loc" + top: "conv8_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv8_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv8_2_mbox_loc_perm" + top: "conv8_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + engine: "MKL2017" + name: "conv8_2_mbox_conf" + type: "Convolution" + bottom: "conv8_2" + top: "conv8_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 84 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv8_2_mbox_conf_perm" + type: "Permute" + bottom: "conv8_2_mbox_conf" + top: "conv8_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv8_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv8_2_mbox_conf_perm" + top: "conv8_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv8_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv8_2" + bottom: "data" + top: "conv8_2_mbox_priorbox" + prior_box_param { + min_size: 213.0 + max_size: 264.0 + aspect_ratio: 2 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 100 + offset: 0.5 + } +} +layer { + name: "conv9_2_mbox_loc" + type: "Convolution" + bottom: "conv9_2" + top: "conv9_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv9_2_mbox_loc_perm" + type: "Permute" + bottom: "conv9_2_mbox_loc" + top: "conv9_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv9_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv9_2_mbox_loc_perm" + top: "conv9_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + engine: "MKL2017" + name: "conv9_2_mbox_conf" + type: "Convolution" + bottom: "conv9_2" + top: "conv9_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 84 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv9_2_mbox_conf_perm" + type: "Permute" + bottom: "conv9_2_mbox_conf" + top: "conv9_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv9_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv9_2_mbox_conf_perm" + top: "conv9_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv9_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv9_2" + bottom: "data" + top: "conv9_2_mbox_priorbox" + prior_box_param { + min_size: 264.0 + max_size: 315.0 + aspect_ratio: 2 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 300 + offset: 0.5 + } +} +layer { + name: "mbox_loc" + type: "Concat" + bottom: "conv4_3_norm_mbox_loc_flat" + bottom: "fc7_mbox_loc_flat" + bottom: "conv6_2_mbox_loc_flat" + bottom: "conv7_2_mbox_loc_flat" + bottom: "conv8_2_mbox_loc_flat" + bottom: "conv9_2_mbox_loc_flat" + top: "mbox_loc" + concat_param { + axis: 1 + } + engine: "CAFFE" +} +layer { + name: "mbox_conf" + type: "Concat" + bottom: "conv4_3_norm_mbox_conf_flat" + bottom: "fc7_mbox_conf_flat" + bottom: "conv6_2_mbox_conf_flat" + bottom: "conv7_2_mbox_conf_flat" + bottom: "conv8_2_mbox_conf_flat" + bottom: "conv9_2_mbox_conf_flat" + top: "mbox_conf" + concat_param { + axis: 1 + } + engine: "CAFFE" +} +layer { + name: "mbox_priorbox" + type: "Concat" + bottom: "conv4_3_norm_mbox_priorbox" + bottom: "fc7_mbox_priorbox" + bottom: "conv6_2_mbox_priorbox" + bottom: "conv7_2_mbox_priorbox" + bottom: "conv8_2_mbox_priorbox" + bottom: "conv9_2_mbox_priorbox" + top: "mbox_priorbox" + concat_param { + axis: 2 + } + engine: "CAFFE" +} diff --git a/models/intel_optimized_models/ssd/VGGNet/VOC0712/SSD_300x300/deploy_mkldnn.prototxt b/models/intel_optimized_models/ssd/VGGNet/VOC0712/SSD_300x300/deploy_mkldnn.prototxt new file mode 100644 index 000000000..754549d27 --- /dev/null +++ b/models/intel_optimized_models/ssd/VGGNet/VOC0712/SSD_300x300/deploy_mkldnn.prototxt @@ -0,0 +1,1626 @@ +name: "VGG_VOC0712_SSD_300x300_deploy" +input: "data" +input_shape { + dim: 1 + dim: 3 + dim: 300 + dim: 300 +} +layer { + engine: "MKLDNN" + name: "conv1_1" + type: "Convolution" + bottom: "data" + top: "conv1_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "relu1_1" + type: "ReLU" + bottom: "conv1_1" + top: "conv1_1" +} +layer { + engine: "MKLDNN" + name: "conv1_2" + type: "Convolution" + bottom: "conv1_1" + top: "conv1_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "relu1_2" + type: "ReLU" + bottom: "conv1_2" + top: "conv1_2" +} +layer { + engine: "MKLDNN" + name: "pool1" + type: "Pooling" + bottom: "conv1_2" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + engine: "MKLDNN" + name: "conv2_1" + type: "Convolution" + bottom: "pool1" + top: "conv2_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "relu2_1" + type: "ReLU" + bottom: "conv2_1" + top: "conv2_1" +} +layer { + engine: "MKLDNN" + name: "conv2_2" + type: "Convolution" + bottom: "conv2_1" + top: "conv2_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "relu2_2" + type: "ReLU" + bottom: "conv2_2" + top: "conv2_2" +} +layer { + engine: "MKLDNN" + name: "pool2" + type: "Pooling" + bottom: "conv2_2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + engine: "MKLDNN" + name: "conv3_1" + type: "Convolution" + bottom: "pool2" + top: "conv3_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "relu3_1" + type: "ReLU" + bottom: "conv3_1" + top: "conv3_1" +} +layer { + engine: "MKLDNN" + name: "conv3_2" + type: "Convolution" + bottom: "conv3_1" + top: "conv3_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "relu3_2" + type: "ReLU" + bottom: "conv3_2" + top: "conv3_2" +} +layer { + engine: "MKLDNN" + name: "conv3_3" + type: "Convolution" + bottom: "conv3_2" + top: "conv3_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "relu3_3" + type: "ReLU" + bottom: "conv3_3" + top: "conv3_3" +} +layer { + engine: "MKLDNN" + name: "pool3" + type: "Pooling" + bottom: "conv3_3" + top: "pool3" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + engine: "MKLDNN" + name: "conv4_1" + type: "Convolution" + bottom: "pool3" + top: "conv4_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "relu4_1" + type: "ReLU" + bottom: "conv4_1" + top: "conv4_1" +} +layer { + engine: "MKLDNN" + name: "conv4_2" + type: "Convolution" + bottom: "conv4_1" + top: "conv4_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "relu4_2" + type: "ReLU" + bottom: "conv4_2" + top: "conv4_2" +} +layer { + engine: "MKLDNN" + name: "conv4_3" + type: "Convolution" + bottom: "conv4_2" + top: "conv4_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "relu4_3" + type: "ReLU" + bottom: "conv4_3" + top: "conv4_3" +} +layer { + engine: "MKLDNN" + name: "pool4" + type: "Pooling" + bottom: "conv4_3" + top: "pool4" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + engine: "MKLDNN" + name: "conv5_1" + type: "Convolution" + bottom: "pool4" + top: "conv5_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + dilation: 1 + } +} +layer { + engine: "MKLDNN" + name: "relu5_1" + type: "ReLU" + bottom: "conv5_1" + top: "conv5_1" +} +layer { + engine: "MKLDNN" + name: "conv5_2" + type: "Convolution" + bottom: "conv5_1" + top: "conv5_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + dilation: 1 + } +} +layer { + engine: "MKLDNN" + name: "relu5_2" + type: "ReLU" + bottom: "conv5_2" + top: "conv5_2" +} +layer { + engine: "MKLDNN" + name: "conv5_3" + type: "Convolution" + bottom: "conv5_2" + top: "conv5_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + dilation: 1 + } +} +layer { + engine: "MKLDNN" + name: "relu5_3" + type: "ReLU" + bottom: "conv5_3" + top: "conv5_3" +} +layer { + engine: "MKLDNN" + name: "pool5" + type: "Pooling" + bottom: "conv5_3" + top: "pool5" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 1 + pad: 1 + } +} +layer { + engine: "MKLDNN" + name: "fc6" + type: "Convolution" + bottom: "pool5" + top: "fc6" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 1024 + pad: 6 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + dilation: 6 + } +} +layer { + engine: "MKLDNN" + name: "relu6" + type: "ReLU" + bottom: "fc6" + top: "fc6" +} +layer { + engine: "MKLDNN" + name: "fc7" + type: "Convolution" + bottom: "fc6" + top: "fc7" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 1024 + kernel_size: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "relu7" + type: "ReLU" + bottom: "fc7" + top: "fc7" +} +layer { + engine: "MKLDNN" + name: "conv6_1" + type: "Convolution" + bottom: "fc7" + top: "conv6_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "conv6_1_relu" + type: "ReLU" + bottom: "conv6_1" + top: "conv6_1" +} +layer { + engine: "MKLDNN" + name: "conv6_2" + type: "Convolution" + bottom: "conv6_1" + top: "conv6_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "conv6_2_relu" + type: "ReLU" + bottom: "conv6_2" + top: "conv6_2" +} +layer { + engine: "MKLDNN" + name: "conv7_1" + type: "Convolution" + bottom: "conv6_2" + top: "conv7_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "conv7_1_relu" + type: "ReLU" + bottom: "conv7_1" + top: "conv7_1" +} +layer { + engine: "MKLDNN" + name: "conv7_2" + type: "Convolution" + bottom: "conv7_1" + top: "conv7_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "conv7_2_relu" + type: "ReLU" + bottom: "conv7_2" + top: "conv7_2" +} +layer { + engine: "MKLDNN" + name: "conv8_1" + type: "Convolution" + bottom: "conv7_2" + top: "conv8_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "conv8_1_relu" + type: "ReLU" + bottom: "conv8_1" + top: "conv8_1" +} +layer { + engine: "MKLDNN" + name: "conv8_2" + type: "Convolution" + bottom: "conv8_1" + top: "conv8_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 0 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "conv8_2_relu" + type: "ReLU" + bottom: "conv8_2" + top: "conv8_2" +} +layer { + engine: "MKLDNN" + name: "conv9_1" + type: "Convolution" + bottom: "conv8_2" + top: "conv9_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 0 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "conv9_1_relu" + type: "ReLU" + bottom: "conv9_1" + top: "conv9_1" +} +layer { + engine: "MKLDNN" + name: "conv9_2" + type: "Convolution" + bottom: "conv9_1" + top: "conv9_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 0 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + engine: "MKLDNN" + name: "conv9_2_relu" + type: "ReLU" + bottom: "conv9_2" + top: "conv9_2" +} +layer { + name: "conv4_3_norm" + type: "Normalize" + bottom: "conv4_3" + top: "conv4_3_norm" + norm_param { + across_spatial: false + scale_filler { + type: "constant" + value: 20 + } + channel_shared: false + } +} +layer { + name: "conv4_3_norm_mbox_loc" + type: "Convolution" + bottom: "conv4_3_norm" + top: "conv4_3_norm_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv4_3_norm_mbox_loc_perm" + type: "Permute" + bottom: "conv4_3_norm_mbox_loc" + top: "conv4_3_norm_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv4_3_norm_mbox_loc_flat" + type: "Flatten" + bottom: "conv4_3_norm_mbox_loc_perm" + top: "conv4_3_norm_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv4_3_norm_mbox_conf" + type: "Convolution" + bottom: "conv4_3_norm" + top: "conv4_3_norm_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 84 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv4_3_norm_mbox_conf_perm" + type: "Permute" + bottom: "conv4_3_norm_mbox_conf" + top: "conv4_3_norm_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv4_3_norm_mbox_conf_flat" + type: "Flatten" + bottom: "conv4_3_norm_mbox_conf_perm" + top: "conv4_3_norm_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv4_3_norm_mbox_priorbox" + type: "PriorBox" + bottom: "conv4_3_norm" + bottom: "data" + top: "conv4_3_norm_mbox_priorbox" + prior_box_param { + min_size: 30.0 + max_size: 60.0 + aspect_ratio: 2 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 8 + offset: 0.5 + } +} +layer { + engine: "MKLDNN" + name: "fc7_mbox_loc" + type: "Convolution" + bottom: "fc7" + top: "fc7_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "fc7_mbox_loc_perm" + type: "Permute" + bottom: "fc7_mbox_loc" + top: "fc7_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "fc7_mbox_loc_flat" + type: "Flatten" + bottom: "fc7_mbox_loc_perm" + top: "fc7_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "fc7_mbox_conf" + type: "Convolution" + bottom: "fc7" + top: "fc7_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 126 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "fc7_mbox_conf_perm" + type: "Permute" + bottom: "fc7_mbox_conf" + top: "fc7_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "fc7_mbox_conf_flat" + type: "Flatten" + bottom: "fc7_mbox_conf_perm" + top: "fc7_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "fc7_mbox_priorbox" + type: "PriorBox" + bottom: "fc7" + bottom: "data" + top: "fc7_mbox_priorbox" + prior_box_param { + min_size: 60.0 + max_size: 111.0 + aspect_ratio: 2 + aspect_ratio: 3 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 16 + offset: 0.5 + } +} +layer { + engine: "MKLDNN" + name: "conv6_2_mbox_loc" + type: "Convolution" + bottom: "conv6_2" + top: "conv6_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv6_2_mbox_loc_perm" + type: "Permute" + bottom: "conv6_2_mbox_loc" + top: "conv6_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv6_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv6_2_mbox_loc_perm" + top: "conv6_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv6_2_mbox_conf" + type: "Convolution" + bottom: "conv6_2" + top: "conv6_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 126 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv6_2_mbox_conf_perm" + type: "Permute" + bottom: "conv6_2_mbox_conf" + top: "conv6_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv6_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv6_2_mbox_conf_perm" + top: "conv6_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv6_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv6_2" + bottom: "data" + top: "conv6_2_mbox_priorbox" + prior_box_param { + min_size: 111.0 + max_size: 162.0 + aspect_ratio: 2 + aspect_ratio: 3 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 32 + offset: 0.5 + } +} +layer { + name: "conv7_2_mbox_loc" + type: "Convolution" + bottom: "conv7_2" + top: "conv7_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 24 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv7_2_mbox_loc_perm" + type: "Permute" + bottom: "conv7_2_mbox_loc" + top: "conv7_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv7_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv7_2_mbox_loc_perm" + top: "conv7_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + engine: "MKLDNN" + name: "conv7_2_mbox_conf" + type: "Convolution" + bottom: "conv7_2" + top: "conv7_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 126 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv7_2_mbox_conf_perm" + type: "Permute" + bottom: "conv7_2_mbox_conf" + top: "conv7_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv7_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv7_2_mbox_conf_perm" + top: "conv7_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv7_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv7_2" + bottom: "data" + top: "conv7_2_mbox_priorbox" + prior_box_param { + min_size: 162.0 + max_size: 213.0 + aspect_ratio: 2 + aspect_ratio: 3 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 64 + offset: 0.5 + } +} +layer { + engine: "MKLDNN" + name: "conv8_2_mbox_loc" + type: "Convolution" + bottom: "conv8_2" + top: "conv8_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv8_2_mbox_loc_perm" + type: "Permute" + bottom: "conv8_2_mbox_loc" + top: "conv8_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv8_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv8_2_mbox_loc_perm" + top: "conv8_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + engine: "MKLDNN" + name: "conv8_2_mbox_conf" + type: "Convolution" + bottom: "conv8_2" + top: "conv8_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 84 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv8_2_mbox_conf_perm" + type: "Permute" + bottom: "conv8_2_mbox_conf" + top: "conv8_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv8_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv8_2_mbox_conf_perm" + top: "conv8_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv8_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv8_2" + bottom: "data" + top: "conv8_2_mbox_priorbox" + prior_box_param { + min_size: 213.0 + max_size: 264.0 + aspect_ratio: 2 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 100 + offset: 0.5 + } +} +layer { + name: "conv9_2_mbox_loc" + type: "Convolution" + bottom: "conv9_2" + top: "conv9_2_mbox_loc" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv9_2_mbox_loc_perm" + type: "Permute" + bottom: "conv9_2_mbox_loc" + top: "conv9_2_mbox_loc_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv9_2_mbox_loc_flat" + type: "Flatten" + bottom: "conv9_2_mbox_loc_perm" + top: "conv9_2_mbox_loc_flat" + flatten_param { + axis: 1 + } +} +layer { + engine: "MKLDNN" + name: "conv9_2_mbox_conf" + type: "Convolution" + bottom: "conv9_2" + top: "conv9_2_mbox_conf" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 84 + pad: 1 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "conv9_2_mbox_conf_perm" + type: "Permute" + bottom: "conv9_2_mbox_conf" + top: "conv9_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 3 + order: 1 + } +} +layer { + name: "conv9_2_mbox_conf_flat" + type: "Flatten" + bottom: "conv9_2_mbox_conf_perm" + top: "conv9_2_mbox_conf_flat" + flatten_param { + axis: 1 + } +} +layer { + name: "conv9_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv9_2" + bottom: "data" + top: "conv9_2_mbox_priorbox" + prior_box_param { + min_size: 264.0 + max_size: 315.0 + aspect_ratio: 2 + flip: true + clip: false + variance: 0.1 + variance: 0.1 + variance: 0.2 + variance: 0.2 + step: 300 + offset: 0.5 + } +} +layer { + name: "mbox_loc" + type: "Concat" + bottom: "conv4_3_norm_mbox_loc_flat" + bottom: "fc7_mbox_loc_flat" + bottom: "conv6_2_mbox_loc_flat" + bottom: "conv7_2_mbox_loc_flat" + bottom: "conv8_2_mbox_loc_flat" + bottom: "conv9_2_mbox_loc_flat" + top: "mbox_loc" + concat_param { + axis: 1 + } + engine: "CAFFE" +} +layer { + name: "mbox_conf" + type: "Concat" + bottom: "conv4_3_norm_mbox_conf_flat" + bottom: "fc7_mbox_conf_flat" + bottom: "conv6_2_mbox_conf_flat" + bottom: "conv7_2_mbox_conf_flat" + bottom: "conv8_2_mbox_conf_flat" + bottom: "conv9_2_mbox_conf_flat" + top: "mbox_conf" + concat_param { + axis: 1 + } + engine: "CAFFE" +} +layer { + name: "mbox_priorbox" + type: "Concat" + bottom: "conv4_3_norm_mbox_priorbox" + bottom: "fc7_mbox_priorbox" + bottom: "conv6_2_mbox_priorbox" + bottom: "conv7_2_mbox_priorbox" + bottom: "conv8_2_mbox_priorbox" + bottom: "conv9_2_mbox_priorbox" + top: "mbox_priorbox" + concat_param { + axis: 2 + } + engine: "CAFFE" +} diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index bf492a24b..c53299d26 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -3,13 +3,13 @@ if(NOT HAVE_PYTHON) return() endif() -include_directories(${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIR} ${Boost_INCLUDE_DIRS}) file(GLOB_RECURSE python_srcs ${PROJECT_SOURCE_DIR}/python/*.cpp) add_library(pycaffe SHARED ${python_srcs}) -target_link_libraries(pycaffe ${Caffe_LINK} ${PYTHON_LIBRARIES} ${Boost_LIBRARIES}) -set_target_properties(pycaffe PROPERTIES PREFIX "" OUTPUT_NAME "_caffe") caffe_default_properties(pycaffe) +set_target_properties(pycaffe PROPERTIES PREFIX "" OUTPUT_NAME "_caffe") +target_include_directories(pycaffe PUBLIC ${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIR}) +target_link_libraries(pycaffe PUBLIC ${Caffe_LINK} ${PYTHON_LIBRARIES}) if(UNIX OR APPLE) set(__linkname "${PROJECT_SOURCE_DIR}/python/caffe/_caffe.so") diff --git a/python/caffe/__init__.py b/python/caffe/__init__.py index a823b52e8..34c939a5b 100755 --- a/python/caffe/__init__.py +++ b/python/caffe/__init__.py @@ -35,7 +35,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # from .pycaffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, RMSPropSolver, AdaDeltaSolver, AdamSolver -from ._caffe import set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list, set_random_seed +from ._caffe import init_log, log, set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list, set_random_seed from ._caffe import __version__ from .proto.caffe_pb2 import TRAIN, TEST from .classifier import Classifier diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp index b7d509ee5..b9dc23e24 100644 --- a/python/caffe/_caffe.cpp +++ b/python/caffe/_caffe.cpp @@ -88,6 +88,23 @@ const int NPY_DTYPE = NPY_FLOAT32; void set_mode_cpu() { Caffe::set_mode(Caffe::CPU); } void set_mode_gpu() { Caffe::set_mode(Caffe::GPU); } +void InitLog() { + ::google::InitGoogleLogging(""); + ::google::InstallFailureSignalHandler(); +} +void InitLogLevel(int level) { + FLAGS_minloglevel = level; + InitLog(); +} +void InitLogLevelPipe(int level, bool stderr) { + FLAGS_minloglevel = level; + FLAGS_logtostderr = stderr; + InitLog(); +} +void Log(const string& s) { + LOG(INFO) << s; +} + void set_random_seed(unsigned int seed) { Caffe::set_random_seed(seed); } // For convenience, check that input files can be opened, and raise an @@ -327,6 +344,10 @@ BOOST_PYTHON_MODULE(_caffe) { bp::scope().attr("__version__") = AS_STRING(CAFFE_VERSION); // Caffe utility functions + bp::def("init_log", &InitLog); + bp::def("init_log", &InitLogLevel); + bp::def("init_log", &InitLogLevelPipe); + bp::def("log", &Log); bp::def("set_mode_cpu", &set_mode_cpu); bp::def("set_mode_gpu", &set_mode_gpu); bp::def("set_random_seed", &set_random_seed); diff --git a/python/caffe/draw.py b/python/caffe/draw.py index 46ef510a0..1f3ab6f7d 100755 --- a/python/caffe/draw.py +++ b/python/caffe/draw.py @@ -127,11 +127,11 @@ def get_layer_label(layer, rankdir): separator, layer.type, separator, - layer.convolution_param.kernel_size[0] if len(layer.convolution_param.kernel_size._values) else 1, + layer.convolution_param.kernel_size[0] if len(layer.convolution_param.kernel_size) else 1, separator, - layer.convolution_param.stride[0] if len(layer.convolution_param.stride._values) else 1, + layer.convolution_param.stride[0] if len(layer.convolution_param.stride) else 1, separator, - layer.convolution_param.pad[0] if len(layer.convolution_param.pad._values) else 0) + layer.convolution_param.pad[0] if len(layer.convolution_param.pad) else 0) elif layer.type == 'Pooling': pooling_types_dict = get_pooling_types_dict() node_label = '"%s%s(%s %s)%skernel size: %d%sstride: %d%spad: %d"' %\ diff --git a/python/caffe/io.py b/python/caffe/io.py index 72a2fc682..0df78e7f3 100755 --- a/python/caffe/io.py +++ b/python/caffe/io.py @@ -117,7 +117,7 @@ def array_to_datum(arr, label=None): if arr.dtype == np.uint8: datum.data = arr.tostring() else: - datum.float_data.extend(arr.flat) + datum.float_data.extend(arr.astype(float).flat) if label is not None: datum.label = label return datum @@ -303,7 +303,7 @@ def set_mean(self, in_, mean): m_min, m_max = mean.min(), mean.max() normal_mean = (mean - m_min) / (m_max - m_min) mean = resize_image(normal_mean.transpose((1,2,0)),in_shape[1:]).transpose((2,0,1)) * (m_max - m_min) + m_min - #aise ValueError('Mean shape incompatible with input shape.') + #raise ValueError('Mean shape incompatible with input shape.') self.mean[in_] = mean def set_input_scale(self, in_, scale): diff --git a/python/caffe/net_spec.py b/python/caffe/net_spec.py index b8d568dcb..10ee4d4f1 100755 --- a/python/caffe/net_spec.py +++ b/python/caffe/net_spec.py @@ -142,6 +142,10 @@ class Function(object): def __init__(self, type_name, inputs, params): self.type_name = type_name + for index, input in enumerate(inputs): + if not isinstance(input, Top): + raise TypeError('%s input %d is not a Top (type is %s)' % + (type_name, index, type(input))) self.inputs = inputs self.params = params self.ntop = self.params.get('ntop', 1) diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py index d105c3f27..bc606148d 100755 --- a/python/caffe/pycaffe.py +++ b/python/caffe/pycaffe.py @@ -79,6 +79,16 @@ def _Net_blob_loss_weights(self): self._blob_loss_weights)) return self._blob_loss_weights_dict +@property +def _Net_layer_dict(self): + """ + An OrderedDict (bottom to top, i.e., input to output) of network + layers indexed by name + """ + if not hasattr(self, '_layer_dict'): + self._layer_dict = OrderedDict(zip(self._layer_names, self.layers)) + return self._layer_dict + @property def _Net_params(self): @@ -139,7 +149,7 @@ def _Net_forward(self, blobs=None, start=None, end=None, **kwargs): if end is not None: end_ind = list(self._layer_names).index(end) - outputs = set([end] + blobs) + outputs = set(self.top_names[end] + blobs) else: end_ind = len(self.layers) - 1 outputs = set(self.outputs + blobs) @@ -187,7 +197,7 @@ def _Net_backward(self, diffs=None, start=None, end=None, **kwargs): if end is not None: end_ind = list(self._layer_names).index(end) - outputs = set([end] + diffs) + outputs = set(self.bottom_names[end] + diffs) else: end_ind = 0 outputs = set(self.inputs + diffs) @@ -357,6 +367,7 @@ def get_id_name(self): # Attach methods to Net. Net.blobs = _Net_blobs Net.blob_loss_weights = _Net_blob_loss_weights +Net.layer_dict = _Net_layer_dict Net.params = _Net_params Net.forward = _Net_forward Net.backward = _Net_backward diff --git a/python/caffe/test/test_draw.py b/python/caffe/test/test_draw.py new file mode 100644 index 000000000..835bb5df0 --- /dev/null +++ b/python/caffe/test/test_draw.py @@ -0,0 +1,37 @@ +import os +import unittest + +from google.protobuf import text_format + +import caffe.draw +from caffe.proto import caffe_pb2 + +def getFilenames(): + """Yields files in the source tree which are Net prototxts.""" + result = [] + + root_dir = os.path.abspath(os.path.join( + os.path.dirname(__file__), '..', '..', '..')) + assert os.path.exists(root_dir) + + for dirname in ('models', 'examples'): + dirname = os.path.join(root_dir, dirname) + assert os.path.exists(dirname) + for cwd, _, filenames in os.walk(dirname): + for filename in filenames: + filename = os.path.join(cwd, filename) + if filename.endswith('.prototxt') and 'solver' not in filename: + yield os.path.join(dirname, filename) + + +class TestDraw(unittest.TestCase): + def test_draw_net(self): + for filename in getFilenames(): + net = caffe_pb2.NetParameter() + with open(filename) as infile: + text_format.Merge(infile.read(), net) + caffe.draw.draw_net(net, 'LR') + + +if __name__ == "__main__": + unittest.main() diff --git a/python/caffe/test/test_net.py b/python/caffe/test/test_net.py index 85845e6c6..04198f06d 100755 --- a/python/caffe/test/test_net.py +++ b/python/caffe/test/test_net.py @@ -61,11 +61,11 @@ def simple_net_file(num_output): bias_filler { type: 'constant' value: 2 } } param { decay_mult: 1 } param { decay_mult: 0 } } - layer { type: 'InnerProduct' name: 'ip' bottom: 'conv' top: 'ip' + layer { type: 'InnerProduct' name: 'ip' bottom: 'conv' top: 'ip_blob' inner_product_param { num_output: """ + str(num_output) + """ weight_filler { type: 'gaussian' std: 2.5 } bias_filler { type: 'constant' value: -3 } } } - layer { type: 'SoftmaxWithLoss' name: 'loss' bottom: 'ip' bottom: 'label' + layer { type: 'SoftmaxWithLoss' name: 'loss' bottom: 'ip_blob' bottom: 'label' top: 'loss' }""") f.close() return f.name @@ -111,10 +111,35 @@ def test_memory(self): for bl in blobs: total += bl.data.sum() + bl.diff.sum() + def test_layer_dict(self): + layer_dict = self.net.layer_dict + self.assertEqual(list(layer_dict.keys()), list(self.net._layer_names)) + for i, name in enumerate(self.net._layer_names): + self.assertEqual(layer_dict[name].type, + self.net.layers[i].type) + def test_forward_backward(self): self.net.forward() self.net.backward() + def test_forward_start_end(self): + conv_blob=self.net.blobs['conv']; + ip_blob=self.net.blobs['ip_blob']; + sample_data=np.random.uniform(size=conv_blob.data.shape); + sample_data=sample_data.astype(np.float32); + conv_blob.data[:]=sample_data; + forward_blob=self.net.forward(start='ip',end='ip'); + self.assertIn('ip_blob',forward_blob); + + manual_forward=[]; + for i in range(0,conv_blob.data.shape[0]): + dot=np.dot(self.net.params['ip'][0].data, + conv_blob.data[i].reshape(-1)); + manual_forward.append(dot+self.net.params['ip'][1].data); + manual_forward=np.array(manual_forward); + + np.testing.assert_allclose(ip_blob.data,manual_forward,rtol=1e-3); + def test_clear_param_diffs(self): # Run a forward/backward step to have non-zero diffs self.net.forward() @@ -134,13 +159,13 @@ def test_top_bottom_names(self): self.assertEqual(self.net.top_names, OrderedDict([('data', ['data', 'label']), ('conv', ['conv']), - ('ip', ['ip']), + ('ip', ['ip_blob']), ('loss', ['loss'])])) self.assertEqual(self.net.bottom_names, OrderedDict([('data', []), ('conv', ['data']), ('ip', ['conv']), - ('loss', ['ip', 'label'])])) + ('loss', ['ip_blob', 'label'])])) def test_save_and_read(self): f = tempfile.NamedTemporaryFile(mode='w+', delete=False) @@ -224,12 +249,12 @@ class TestLevels(unittest.TestCase): """ def setUp(self): - self.f = tempfile.NamedTemporaryFile(mode='w+') + self.f = tempfile.NamedTemporaryFile(mode='w+', delete=False) self.f.write(self.TEST_NET) - self.f.flush() + self.f.close() def tearDown(self): - self.f.close() + os.remove(self.f.name) def check_net(self, net, blobs): net_blobs = [b for b in net.blobs.keys() if 'data' not in b] @@ -289,12 +314,12 @@ class TestStages(unittest.TestCase): """ def setUp(self): - self.f = tempfile.NamedTemporaryFile(mode='w+') + self.f = tempfile.NamedTemporaryFile(mode='w+', delete=False) self.f.write(self.TEST_NET) - self.f.flush() + self.f.close() def tearDown(self): - self.f.close() + os.remove(self.f.name) def check_net(self, net, blobs): net_blobs = [b for b in net.blobs.keys() if 'data' not in b] @@ -371,12 +396,12 @@ class TestAllInOne(unittest.TestCase): """ def setUp(self): - self.f = tempfile.NamedTemporaryFile(mode='w+') + self.f = tempfile.NamedTemporaryFile(mode='w+', delete=False) self.f.write(self.TEST_NET) - self.f.flush() + self.f.close() def tearDown(self): - self.f.close() + os.remove(self.f.name) def check_net(self, net, outputs): self.assertEqual(list(net.blobs['data'].shape), [1,1,10,10]) diff --git a/python/caffe/test/test_net_spec.py b/python/caffe/test/test_net_spec.py index d1b1f0af9..36520c2a5 100755 --- a/python/caffe/test/test_net_spec.py +++ b/python/caffe/test/test_net_spec.py @@ -115,3 +115,11 @@ def test_zero_tops(self): net_proto = silent_net() net = self.load_net(net_proto) self.assertEqual(len(net.forward()), 0) + + def test_type_error(self): + """Test that a TypeError is raised when a Function input isn't a Top.""" + data = L.DummyData(ntop=2) # data is a 2-tuple of Tops + r = r"^Silence input 0 is not a Top \(type is <(type|class) 'tuple'>\)$" + with self.assertRaisesRegexp(TypeError, r): + L.Silence(data, ntop=0) # should raise: data is a tuple, not a Top + L.Silence(*data, ntop=0) # shouldn't raise: each elt of data is a Top diff --git a/src/caffe/data_reader.cpp b/src/caffe/data_reader.cpp index 01ebe50c8..69e8f20f5 100644 --- a/src/caffe/data_reader.cpp +++ b/src/caffe/data_reader.cpp @@ -44,7 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "caffe/data_reader.hpp" #include "caffe/layers/data_layer.hpp" #include "caffe/proto/caffe.pb.h" - +#include "caffe/multinode/mlsl.hpp" namespace caffe { using boost::weak_ptr; @@ -99,7 +99,7 @@ DataReader::QueuePair::~QueuePair() { DataReader::Body::Body(const LayerParameter& param) : param_(param), - new_queue_pairs_() { + new_queue_pairs_(), first_read_(true) { StartInternalThread(); } @@ -147,18 +147,17 @@ void DataReader::Body::read_one(DBWrapper* dbw, QueuePair* qp) { CHECK(dbw); CHECK(qp); -#ifdef CAFFE_MLSL_SHUFFLE +#ifdef USE_MLSL string* data = qp->free_.pop(); - static int mb=0; - if(!mb) { /* move each node’s file position to its node ID – this part can be move to the initialization */ - for(int i=0;iNext(); } - mb = 1; + first_read_ = false; } *data = dbw->value(); qp->full_.push(data); - for(int i=0;iNext(); } #else @@ -191,8 +190,17 @@ DataReader::DBShuffle::DBShuffle(const LayerParameter& param):DBWrapper(param) { // randomly shuffle data LOG(INFO) << "Shuffling data"; +#ifdef USE_MLSL + mn::Distribution * distrib = mn::get_distrib(); + float fetch_seed; + fetch_seed = static_cast(caffe_rng_rand() % 15); + distrib->bcast(&fetch_seed, 1); + LOG(INFO) << "Random seed for shuffling: " << fetch_seed; + prefetch_rng_.reset(new Caffe::RNG(static_cast(fetch_seed))); +#else const unsigned int prefetch_rng_seed = caffe_rng_rand(); prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed)); +#endif ShuffleImages(); } diff --git a/src/caffe/data_transformer.cpp b/src/caffe/data_transformer.cpp index 7f4fbc830..5185308e8 100644 --- a/src/caffe/data_transformer.cpp +++ b/src/caffe/data_transformer.cpp @@ -172,14 +172,24 @@ void DataTransformer::Transform(const Datum& datum, template template -void DataTransformer::Transform(const Datum& datum, +void DataTransformer::Transform(const Datum& datum_in, Dtype* transformed_data, NormalizedBBox* crop_bbox, RandNumbers& rand_num) { - const string& data = datum.data(); - const int datum_channels = datum.channels(); - const int datum_height = datum.height(); - const int datum_width = datum.width(); + const Datum *datum = &datum_in; + Datum resized_datum; + if (param_.has_random_resize_param()) { +#ifdef USE_OPENCV + RandomResizeImage(datum_in, &resized_datum); + datum = &resized_datum; +#else + LOG(FATAL) << "Random image resizing requires OpenCV; compile with USE_OPENCV."; +#endif + } + const string& data = datum->data(); + const int datum_channels = datum->channels(); + const int datum_height = datum->height(); + const int datum_width = datum->width(); const int crop_size = param_.crop_size(); const Dtype scale = param_.scale(); @@ -245,7 +255,7 @@ void DataTransformer::Transform(const Datum& datum, datum_element = static_cast(static_cast(data[data_index])); } else { - datum_element = datum.float_data(data_index); + datum_element = datum->float_data(data_index); } if (has_mean_file) { transformed_data[top_index] = @@ -756,10 +766,20 @@ void DataTransformer::Transform(const cv::Mat& cv_img, template template -void DataTransformer::Transform(const cv::Mat& cv_img, +void DataTransformer::Transform(const cv::Mat& cv_img_in, Blob* transformed_blob, NormalizedBBox* crop_bbox, RandNumbers& rand_num) { + const cv::Mat *cv_img = &cv_img_in; + cv::Mat resized_img; + if (param_.has_random_resize_param()) { +#ifdef USE_OPENCV + RandomResizeImage(cv_img_in, &resized_img); + cv_img = &resized_img; +#else + LOG(FATAL) << "Random image resizing requires OpenCV; compile with USE_OPENCV."; +#endif + } const int crop_size = param_.crop_size(); - const int img_channels = cv_img.channels(); + const int img_channels = cv_img->channels(); // Check dimensions. const int channels = transformed_blob->channels(); @@ -770,7 +790,7 @@ void DataTransformer::Transform(const cv::Mat& cv_img, CHECK_EQ(channels, img_channels); CHECK_GE(num, 1); - CHECK(cv_img.depth() == CV_8U) << "Image data type must be unsigned byte"; + CHECK(cv_img->depth() == CV_8U) << "Image data type must be unsigned byte"; const Dtype scale = param_.scale(); @@ -793,9 +813,9 @@ void DataTransformer::Transform(const cv::Mat& cv_img, } cv::Mat cv_resized_img, cv_noised_img; if (param_.has_resize_param()) { - cv_resized_img = ApplyResize(cv_img, param_.resize_param()); + cv_resized_img = ApplyResize(*cv_img, param_.resize_param()); } else { - cv_resized_img = cv_img; + cv_resized_img = *cv_img; } if (param_.has_noise_param()) { cv_noised_img = ApplyNoise(cv_resized_img, param_.noise_param()); @@ -809,7 +829,7 @@ void DataTransformer::Transform(const cv::Mat& cv_img, int h_off = 0; int w_off = 0; - cv::Mat cv_cropped_img = cv_img; + cv::Mat cv_cropped_img = *cv_img; if (crop_size) { CHECK_EQ(crop_size, height); CHECK_EQ(crop_size, width); @@ -822,7 +842,7 @@ void DataTransformer::Transform(const cv::Mat& cv_img, w_off = (img_width - crop_size) / 2; } cv::Rect roi(w_off, h_off, crop_size, crop_size); - cv_cropped_img = cv_img(roi); + cv_cropped_img = (*cv_img)(roi); } else { cv_cropped_img = cv_noised_img; } @@ -1035,6 +1055,42 @@ void DataTransformer::ExpandImage(const cv::Mat& img, img.copyTo((*expand_img)(bbox_roi)); } +template +void DataTransformer::RandomResizeImage(const Datum& datum, Datum *resized_datum) { + shared_ptr img; + if (datum.encoded()) { + img = shared_ptr(new cv::Mat(DecodeDatumToCVMatNative(datum))); + } else { + img = shared_ptr(new cv::Mat( + cv::Size(datum.width(), datum.height()), + CV_8UC(datum.channels()), + (void*)datum.data().data())); + } + cv::Mat resized_img; + RandomResizeImage(*img, &resized_img); + CVMatToDatum(resized_img, resized_datum); +} + +template +void DataTransformer::RandomResizeImage(const cv::Mat& img, cv::Mat *resized_img) { + int h = img.size().height; + int w = img.size().width; + int min_size = param_.random_resize_param().min_size(); + int max_size = param_.random_resize_param().max_size(); + ResizeParameter resize_param = param_.random_resize_param().resize_param(); + if (min_size == 0) min_size = std::min(h,w); + if (max_size == 0) max_size = std::max(h,w); + int shorter_size = rand_num_(max_size - min_size + 1) + min_size; + resize_param.set_height(shorter_size); + resize_param.set_width(shorter_size); + if (h < w) { + resize_param.set_width(int(float(w) / h * shorter_size)); + } else { + resize_param.set_height(int(float(h) / w * shorter_size)); + } + *resized_img = ApplyResize(img, resize_param); +} + #endif // USE_OPENCV template diff --git a/src/caffe/layers/mkl_batch_norm_layer.cpp b/src/caffe/layers/mkl_batch_norm_layer.cpp index 8a1e44ab8..6dce50243 100755 --- a/src/caffe/layers/mkl_batch_norm_layer.cpp +++ b/src/caffe/layers/mkl_batch_norm_layer.cpp @@ -467,11 +467,12 @@ void MKLBatchNormLayer::Backward_cpu( CHECK_EQ(e, E_SUCCESS); if (use_weight_bias_) { - caffe_cpu_copy(this->blobs_[3]->count(), - diffScaleShift_buffer_, this->blobs_[3]->mutable_cpu_diff()); + caffe_cpu_axpby(this->blobs_[3]->count(), (Dtype)1., + diffScaleShift_buffer_, (Dtype)1., this->blobs_[3]->mutable_cpu_diff()); if (bias_term_) - caffe_cpu_copy(this->blobs_[4]->count(), - diffScaleShift_buffer_ + channels_, this->blobs_[4]->mutable_cpu_diff()); + caffe_cpu_axpby(this->blobs_[4]->count(), (Dtype)1., + diffScaleShift_buffer_ + channels_, + (Dtype)1., this->blobs_[4]->mutable_cpu_diff()); else caffe_set(this->blobs_[4]->count(), static_cast(0), this->blobs_[4]->mutable_cpu_diff()); diff --git a/src/caffe/layers/mkldnn_batch_norm_layer.cpp b/src/caffe/layers/mkldnn_batch_norm_layer.cpp index dd1b7f7b6..4db92b943 100644 --- a/src/caffe/layers/mkldnn_batch_norm_layer.cpp +++ b/src/caffe/layers/mkldnn_batch_norm_layer.cpp @@ -246,8 +246,13 @@ void MKLDNNBatchNormLayer::InitBatchNorm(const vector*>& bott } } - fwd_bottom_data->set_mkldnn_primitive(BatchNormFwd); - fwd_top_data->set_mkldnn_primitive(BatchNormFwd); + //fwd_bottom_data->set_mkldnn_primitive(BatchNormFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_bottom_data_primitive_transfer(input_primitive); + fwd_bottom_data->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); + + //fwd_top_data->set_mkldnn_primitive(BatchNormFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_top_data_memory_transfer(output_memory); + fwd_top_data->set_mkldnn_primitive(fwd_top_data_memory_transfer); //Fix: MKLDNN batch norm only support 4D memory descriptor! Use 4D for calculation and reshape to 2D for output! bool has_spatial = (bottom[0]->shape().size() != 2); @@ -259,8 +264,8 @@ void MKLDNNBatchNormLayer::InitBatchNorm(const vector*>& bott #ifdef DEBUG LOG(INFO) << "size of bottom blob: " << bottom[0]->shape().size(); LOG(INFO) << "MKLDNN batch norm only support 4D memory descriptor! Use 4D for calculation and reshape to 2D for output!"; -#endif - vector top_shape; +#endif + vector top_shape; top_shape.push_back(bottom[0]->num()); top_shape.push_back(bottom[0]->channels()); top[0]->Reshape(top_shape); @@ -413,8 +418,13 @@ void MKLDNNBatchNormLayer::InitBatchNormBwd( *bwd_top_diff_primitive, *bwd_bottom_diff_memory)); } - bwd_top_diff->set_mkldnn_primitive(BatchNormBwd); - bwd_bottom_diff->set_mkldnn_primitive(BatchNormBwd); + //bwd_top_diff->set_mkldnn_primitive(BatchNormBwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_top_diff_primitive_transfer(bwd_top_diff_primitive); + bwd_top_diff->set_mkldnn_primitive(bwd_top_diff_primitive_transfer); + + //bwd_bottom_diff->set_mkldnn_primitive(BatchNormBwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_bottom_diff_memory_transfer(bwd_bottom_diff_memory); + bwd_bottom_diff->set_mkldnn_primitive(bwd_bottom_diff_memory_transfer); } template diff --git a/src/caffe/layers/mkldnn_concat_layer.cpp b/src/caffe/layers/mkldnn_concat_layer.cpp index ee2cc5026..a0a1cd487 100644 --- a/src/caffe/layers/mkldnn_concat_layer.cpp +++ b/src/caffe/layers/mkldnn_concat_layer.cpp @@ -101,7 +101,7 @@ void MKLDNNConcatLayer::InitConcatFwd(const vector*>& bottom, LOG(INFO) << "size of bottom blob: " << bottom[0]->shape().size(); LOG(INFO) << "size of top blob: " << top[0]->shape().size(); LOG(INFO) << "MKLDNN concat layer only support 4D blob as input! Reshape the 2D input blob into 4D for calculation!"; -#endif +#endif vector bottom_4D_shape; int bottom_4D_height = 1; int bottom_4D_width = 1; @@ -168,9 +168,13 @@ void MKLDNNConcatLayer::InitConcatFwd(const vector*>& bottom, concatFwd.reset(new concat(*concatFwd_pd, fwd_input_primitives_at_, *fwd_output_memory)); for (auto i = 0; i < num_concats_; i++) { - fwd_bottom_data[i]->set_mkldnn_primitive(concatFwd); + //fwd_bottom_data[i]->set_mkldnn_primitive(concatFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_bottom_data_primitive_transfer(fwd_input_primitives_[i]); + fwd_bottom_data[i]->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); } - fwd_top_data->set_mkldnn_primitive(concatFwd); + //fwd_top_data->set_mkldnn_primitive(concatFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_top_data_memory_transfer(fwd_output_memory); + fwd_top_data->set_mkldnn_primitive(fwd_top_data_memory_transfer); } template @@ -237,11 +241,14 @@ void MKLDNNConcatLayer::InitConcatBwd(const vector*>& top, offsets[concat_dimension] += dims[concat_dimension]; - bwd_bottom_diff[i]->set_mkldnn_primitive(reorders[i]); + //bwd_bottom_diff[i]->set_mkldnn_primitive(reorders[i]); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_bottom_diff_memory_transfer(bwd_reorder_output_memory[i]); + bwd_bottom_diff[i]->set_mkldnn_primitive(bwd_bottom_diff_memory_transfer); } - bwd_top_diff->set_mkldnn_primitive(reorders[0]); - + //bwd_top_diff->set_mkldnn_primitive(reorders[0]); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_top_diff_memory_transfer(bwd_reorder_input_memory); + bwd_top_diff->set_mkldnn_primitive(bwd_top_diff_memory_transfer); } template diff --git a/src/caffe/layers/mkldnn_convolution_layer.cpp b/src/caffe/layers/mkldnn_convolution_layer.cpp index f6b79532b..d65dbf3bf 100644 --- a/src/caffe/layers/mkldnn_convolution_layer.cpp +++ b/src/caffe/layers/mkldnn_convolution_layer.cpp @@ -261,7 +261,9 @@ void MKLDNNConvolutionLayer::InitConvolutionFwd(const vector* , *fwd_bottom_data_primitive, *fwd_weights_data_primitive , *fwd_bias_data_primitive, *fwd_top_data_memory)); } - fwd_bias_data->set_mkldnn_primitive(convFwd); + //fwd_bias_data->set_mkldnn_primitive(convFwd); //Wrong passed primitive! (For sure!) + MKLDNNPrimitive fwd_bias_data_primitive_transfer(fwd_bias_data_primitive); + fwd_bias_data->set_mkldnn_primitive(fwd_bias_data_primitive_transfer); } else { if(relu) { convFwd.reset(new convolution_relu_forward(*convReluFwd_pd @@ -273,11 +275,13 @@ void MKLDNNConvolutionLayer::InitConvolutionFwd(const vector* , *fwd_top_data_memory)); } } - fwd_bottom_data->set_mkldnn_primitive(convFwd); //Wrong passed primitive! (TODO: Checking!) - //MKLDNNPrimitive fwd_bottom_data_primitive_transfer(fwd_bottom_data_primitive); - //fwd_bottom_data->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); + //fwd_bottom_data->set_mkldnn_primitive(convFwd); //Wrong passed primitive! (For sure!) + MKLDNNPrimitive fwd_bottom_data_primitive_transfer(fwd_bottom_data_primitive); + fwd_bottom_data->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); - fwd_top_data->set_mkldnn_primitive(convFwd); + //fwd_top_data->set_mkldnn_primitive(convFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_top_data_memory_transfer(fwd_top_data_memory); + fwd_top_data->set_mkldnn_primitive(fwd_top_data_memory_transfer); //fwd_weights_data->set_mkldnn_primitive(convFwd); //Wrong passed primitive! (For sure!) MKLDNNPrimitive fwd_weights_data_primitive_transfer(fwd_weights_data_primitive); @@ -447,7 +451,9 @@ void MKLDNNConvolutionLayer::InitConvolutionBwd(const vector* , *bwdw_bottom_data_primitive, *bwdw_top_diff_primitive , *bwdw_weights_diff_memory, *bwdw_bias_diff_memory)); - bwdw_bias_diff->set_mkldnn_primitive(convBwdWeights); + //bwdw_bias_diff->set_mkldnn_primitive(convBwdWeights); //Wrong passed primitive! (For sure!) + MKLDNNPrimitive bwdw_bias_diff_memory_transfer(bwdw_bias_diff_memory); + bwdw_bias_diff->set_mkldnn_primitive(bwdw_bias_diff_memory_transfer); } else { convBwdWeights.reset(new convolution_backward_weights(*convBwdWeights_pd , *bwdw_bottom_data_primitive, *bwdw_top_diff_primitive @@ -458,26 +464,30 @@ void MKLDNNConvolutionLayer::InitConvolutionBwd(const vector* , *bwdd_top_diff_primitive, *bwdd_weights_data_primitive , *bwdd_bottom_diff_memory)); - bwdd_bottom_diff->set_mkldnn_primitive(convBwdData); + //bwdd_bottom_diff->set_mkldnn_primitive(convBwdData); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdd_bottom_diff_memory_transfer(bwdd_bottom_diff_memory); + bwdd_bottom_diff->set_mkldnn_primitive(bwdd_bottom_diff_memory_transfer); - bwdd_top_diff->set_mkldnn_primitive(convBwdData); //Wrong passed primitive! (TODO: Checking!) - //MKLDNNPrimitive bwdd_top_diff_primitive_transfer(bwdd_top_diff_primitive); - //bwdd_top_diff->set_mkldnn_primitive(bwdd_top_diff_primitive_transfer); + //bwdd_top_diff->set_mkldnn_primitive(convBwdData); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdd_top_diff_primitive_transfer(bwdd_top_diff_primitive); + bwdd_top_diff->set_mkldnn_primitive(bwdd_top_diff_primitive_transfer); //bwdd_weights_data->set_mkldnn_primitive(convBwdData); //Wrong passed primitive! (For sure!) MKLDNNPrimitive bwdd_weights_data_primitive_transfer(bwdd_weights_data_primitive); bwdd_weights_data->set_mkldnn_primitive(bwdd_weights_data_primitive_transfer); - bwdw_bottom_data->set_mkldnn_primitive(convBwdWeights); //Wrong passed primitive! (TODO: Checking!) - //MKLDNNPrimitive bwdw_bottom_data_primitive_transfer(bwdw_bottom_data_primitive); - //bwdw_bottom_data->set_mkldnn_primitive(bwdw_bottom_data_primitive_transfer); + //bwdw_bottom_data->set_mkldnn_primitive(convBwdWeights); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdw_bottom_data_primitive_transfer(bwdw_bottom_data_primitive); + bwdw_bottom_data->set_mkldnn_primitive(bwdw_bottom_data_primitive_transfer); - bwdw_top_diff->set_mkldnn_primitive(convBwdWeights); //Wrong passed primitive! (TODO: Checking!) - //MKLDNNPrimitive bwdw_top_diff_primitive_transfer(bwdw_top_diff_primitive); - //bwdw_top_diff->set_mkldnn_primitive(bwdw_top_diff_primitive_transfer); + //bwdw_top_diff->set_mkldnn_primitive(convBwdWeights); //Wrong passed primitive! (For sure!) + MKLDNNPrimitive bwdw_top_diff_primitive_transfer(bwdw_top_diff_primitive); + bwdw_top_diff->set_mkldnn_primitive(bwdw_top_diff_primitive_transfer); - bwdw_weights_diff->set_mkldnn_primitive(convBwdWeights); + //bwdw_weights_diff->set_mkldnn_primitive(convBwdWeights); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdw_weights_diff_memory_transfer(bwdw_weights_diff_memory); + bwdw_weights_diff->set_mkldnn_primitive(bwdw_weights_diff_memory_transfer); // Names are for debugging purposes only. } diff --git a/src/caffe/layers/mkldnn_eltwise_layer.cpp b/src/caffe/layers/mkldnn_eltwise_layer.cpp index 2a4a87c79..060467e82 100644 --- a/src/caffe/layers/mkldnn_eltwise_layer.cpp +++ b/src/caffe/layers/mkldnn_eltwise_layer.cpp @@ -201,9 +201,13 @@ void MKLDNNEltwiseLayer::InitEltwiseFwd(const vector*>& botto for (auto i = 0; i < num_bottoms_; i++) { - fwd_bottom_data[i]->set_mkldnn_primitive(eltwiseFwd); + //fwd_bottom_data[i]->set_mkldnn_primitive(eltwiseFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_bottom_data_primitive_transfer(fwd_bottom_data_primitives_[i]); + fwd_bottom_data[i]->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); } - fwd_top_data->set_mkldnn_primitive(eltwiseFwd); + //fwd_top_data->set_mkldnn_primitive(eltwiseFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_top_data_memory_transfer(fwd_top_data_memory); + fwd_top_data->set_mkldnn_primitive(fwd_top_data_memory_transfer); } @@ -214,9 +218,9 @@ void MKLDNNEltwiseLayer::Forward_cpu(const vector*>& bottom, if(eltwiseFwd_pd == NULL) InitEltwiseFwd(bottom, top); - for (auto i = 0; i < num_bottoms_; i++) - { - // making reorders if needed. + for (auto i = 0; i < num_bottoms_; i++) + { + // making reorders if needed. fwd_bottom_data[i]->sync_before_read(); } // update top that head at prv @@ -233,13 +237,13 @@ void MKLDNNEltwiseLayer::Backward_cpu(const vector*>& top , const vector& propagate_down , const vector*>& bottom) { - VLOG(1) << "MKLDNNEltwiseLayer::Backward_cpu: " << this->layer_param_.name(); - - for (int i = 0; i < num_bottoms_; ++i) - { - //Eltwise layer is not supporting multiplication coefficient in Backward due to lack of supporting scale and copy primitives in MKL-DNN - CHECK_EQ(coeffs_[i], Dtype(1)) << "Not supported yet"; - + VLOG(1) << "MKLDNNEltwiseLayer::Backward_cpu: " << this->layer_param_.name(); + + for (int i = 0; i < num_bottoms_; ++i) + { + //Eltwise layer is not supporting multiplication coefficient in Backward due to lack of supporting scale and copy primitives in MKL-DNN + CHECK_EQ(coeffs_[i], Dtype(1)) << "Not supported yet"; + bottom[i]->ShareDiff(*top[0]); } } diff --git a/src/caffe/layers/mkldnn_inner_product_layer.cpp b/src/caffe/layers/mkldnn_inner_product_layer.cpp index d2fe6cfaa..1c92669c1 100644 --- a/src/caffe/layers/mkldnn_inner_product_layer.cpp +++ b/src/caffe/layers/mkldnn_inner_product_layer.cpp @@ -235,18 +235,24 @@ void MKLDNNInnerProductLayer::InitInnerProductFwd(const vectorset_mkldnn_primitive(ipFwd); //Wrong passed primitive! (TODO: Checking!) - //MKLDNNPrimitive fwd_bottom_data_primitive_transfer(fwd_bottom_data_primitive); - //fwd_bottom_data->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); + //fwd_bottom_data->set_mkldnn_primitive(ipFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_bottom_data_primitive_transfer(fwd_bottom_data_primitive); + fwd_bottom_data->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); - fwd_top_data->set_mkldnn_primitive(ipFwd); - - fwd_weights_data->set_mkldnn_primitive(ipFwd); //Wrong passed primitive! (TODO: Checking!) - //MKLDNNPrimitive fwd_weights_data_primitive_transfer(fwd_weights_data_primitive); - //fwd_weights_data->set_mkldnn_primitive(fwd_weights_data_primitive_transfer); + //fwd_top_data->set_mkldnn_primitive(ipFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_top_data_memory_transfer(fwd_top_data_memory); + fwd_top_data->set_mkldnn_primitive(fwd_top_data_memory_transfer); - if (this->bias_term_) - fwd_bias_data->set_mkldnn_primitive(ipFwd); + //fwd_weights_data->set_mkldnn_primitive(ipFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_weights_data_primitive_transfer(fwd_weights_data_primitive); + fwd_weights_data->set_mkldnn_primitive(fwd_weights_data_primitive_transfer); + + if (this->bias_term_) + { + //fwd_bias_data->set_mkldnn_primitive(ipFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_bias_data_primitive_transfer(fwd_bias_data_primitive); + fwd_bias_data->set_mkldnn_primitive(fwd_bias_data_primitive_transfer); + } } template @@ -416,29 +422,37 @@ void MKLDNNInnerProductLayer::InitInnerProductBwd(const vectorset_mkldnn_primitive(ipBwdData); + //bwdd_bottom_diff->set_mkldnn_primitive(ipBwdData); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdd_bottom_diff_memory_transfer(bwdd_bottom_diff_memory); + bwdd_bottom_diff->set_mkldnn_primitive(bwdd_bottom_diff_memory_transfer); - bwdd_top_diff->set_mkldnn_primitive(ipBwdData); //Wrong passed primitive! (TODO: Checking!) - //MKLDNNPrimitive bwdd_top_diff_primitive_transfer(bwdd_top_diff_primitive); - //bwdd_top_diff->set_mkldnn_primitive(bwdd_top_diff_primitive_transfer); + //bwdd_top_diff->set_mkldnn_primitive(ipBwdData); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdd_top_diff_primitive_transfer(bwdd_top_diff_primitive); + bwdd_top_diff->set_mkldnn_primitive(bwdd_top_diff_primitive_transfer); - bwdd_weights_data->set_mkldnn_primitive(ipBwdData); //Wrong passed primitive! (TODO: Checking!) - //MKLDNNPrimitive bwdd_weights_data_primitive_transfer(bwdd_weights_data_primitive); - //bwdd_weights_data->set_mkldnn_primitive(bwdd_weights_data_primitive_transfer); + //bwdd_weights_data->set_mkldnn_primitive(ipBwdData); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdd_weights_data_primitive_transfer(bwdd_weights_data_primitive); + bwdd_weights_data->set_mkldnn_primitive(bwdd_weights_data_primitive_transfer); - bwdw_bottom_data->set_mkldnn_primitive(ipBwdWeights); //Wrong passed primitive! (TODO: Checking!) - //MKLDNNPrimitive bwdw_bottom_data_primitive_transfer(bwdw_bottom_data_primitive); - //bwdw_bottom_data->set_mkldnn_primitive(bwdw_bottom_data_primitive_transfer); + //bwdw_bottom_data->set_mkldnn_primitive(ipBwdWeights); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdw_bottom_data_primitive_transfer(bwdw_bottom_data_primitive); + bwdw_bottom_data->set_mkldnn_primitive(bwdw_bottom_data_primitive_transfer); - bwdw_top_diff->set_mkldnn_primitive(ipBwdWeights); //Wrong passed primitive! (TODO: Checking!) - //MKLDNNPrimitive bwdw_top_diff_primitive_transfer(bwdw_top_diff_primitive); - //bwdw_top_diff->set_mkldnn_primitive(bwdw_top_diff_primitive_transfer); + //bwdw_top_diff->set_mkldnn_primitive(ipBwdWeights); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdw_top_diff_primitive_transfer(bwdw_top_diff_primitive); + bwdw_top_diff->set_mkldnn_primitive(bwdw_top_diff_primitive_transfer); - bwdw_weights_diff->set_mkldnn_primitive(ipBwdWeights); + //bwdw_weights_diff->set_mkldnn_primitive(ipBwdWeights); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdw_weights_diff_memory_transfer(bwdw_weights_diff_memory); + bwdw_weights_diff->set_mkldnn_primitive(bwdw_weights_diff_memory_transfer); if (this->bias_term_) - bwdw_bias_diff->set_mkldnn_primitive(ipBwdWeights); + { + //bwdw_bias_diff->set_mkldnn_primitive(ipBwdWeights); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwdw_bias_diff_memory_transfer(bwdw_bias_diff_memory); + bwdw_bias_diff->set_mkldnn_primitive(bwdw_bias_diff_memory_transfer); + } } @@ -482,9 +496,9 @@ void MKLDNNInnerProductLayer::Backward_cpu(const vector*>& to else { LOG(INFO) << "Debug: Top prv diff is NULL!"; - LOG(INFO) << "Debug: Top cpu diff: " << *top[0]->cpu_diff(); - } - + LOG(INFO) << "Debug: Top cpu diff: " << *top[0]->cpu_diff(); + } + if (this->blobs_[0]->prv_data() != NULL) { LOG(INFO) << "Debug: Weights prv data from blobs_[0]: " << *this->blobs_[0]->prv_data(); @@ -492,9 +506,9 @@ void MKLDNNInnerProductLayer::Backward_cpu(const vector*>& to else { LOG(INFO) << "Debug: Weights prv data is NULL!"; - LOG(INFO) << "Debug: Weights cpu data: " << *this->blobs_[0]->cpu_data(); - } - //Before submit, so get_prv_ptr() always has the value + LOG(INFO) << "Debug: Weights cpu data: " << *this->blobs_[0]->cpu_data(); + } + //Before submit, so get_prv_ptr() always has the value LOG(INFO) << "Debug: Weights prv data from get_prv_ptr: " << *bwdd_weights_data->get_prv_ptr(); #endif ipBwdData.submit(); @@ -505,8 +519,8 @@ void MKLDNNInnerProductLayer::Backward_cpu(const vector*>& to } else { - LOG(INFO) << "Debug: Bottom prv diff is NULL!"; - LOG(INFO) << "Debug: Bottom cpu diff: " << *bottom[0]->cpu_diff(); + LOG(INFO) << "Debug: Bottom prv diff is NULL!"; + LOG(INFO) << "Debug: Bottom cpu diff: " << *bottom[0]->cpu_diff(); } #endif PERFORMANCE_MEASUREMENT_END_ID(perf_id_bw_); diff --git a/src/caffe/layers/mkldnn_lrn_layer.cpp b/src/caffe/layers/mkldnn_lrn_layer.cpp index c5eb48d1c..6c589c73e 100644 --- a/src/caffe/layers/mkldnn_lrn_layer.cpp +++ b/src/caffe/layers/mkldnn_lrn_layer.cpp @@ -198,8 +198,13 @@ void MKLDNNLRNLayer::InitLRNFwd(const vector*>& bottom, const } else { lrnFwd.reset(new lrn_forward(*lrnFwd_pd, *fwd_bottom_data_primitive, *fwd_top_data_memory)); } - fwd_bottom_data->set_mkldnn_primitive(lrnFwd); - fwd_top_data->set_mkldnn_primitive(lrnFwd); + //fwd_bottom_data->set_mkldnn_primitive(lrnFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_bottom_data_primitive_transfer(fwd_bottom_data_primitive); + fwd_bottom_data->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); + + //fwd_top_data->set_mkldnn_primitive(lrnFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_top_data_memory_transfer(fwd_top_data_memory); + fwd_top_data->set_mkldnn_primitive(fwd_top_data_memory_transfer); } @@ -340,8 +345,13 @@ void MKLDNNLRNLayer::InitLRNBwd(const vector*>& top bwd_top_diff_primitive = bwd_top_diff->create_input(false); lrnBwd.reset(new lrn_backward(*lrnBwd_pd, *fwd_bottom_data_primitive, *bwd_top_diff_primitive, *scratch_memory, *bwd_bottom_diff_memory)); - bwd_bottom_diff->set_mkldnn_primitive(lrnBwd); - bwd_top_diff->set_mkldnn_primitive(lrnBwd); + //bwd_bottom_diff->set_mkldnn_primitive(lrnBwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_bottom_diff_memory_transfer(bwd_bottom_diff_memory); + bwd_bottom_diff->set_mkldnn_primitive(bwd_bottom_diff_memory_transfer); + + //bwd_top_diff->set_mkldnn_primitive(lrnBwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_top_diff_primitive_transfer(bwd_top_diff_primitive); + bwd_top_diff->set_mkldnn_primitive(bwd_top_diff_primitive_transfer); } diff --git a/src/caffe/layers/mkldnn_pooling_layer.cpp b/src/caffe/layers/mkldnn_pooling_layer.cpp index 849abd0f3..4a54a2efc 100644 --- a/src/caffe/layers/mkldnn_pooling_layer.cpp +++ b/src/caffe/layers/mkldnn_pooling_layer.cpp @@ -216,18 +216,20 @@ void MKLDNNPoolingLayer::InitPoolingFwd(const vector*>& botto // ---- Initialize memory descriptors ------------- typedef typename memory::primitive_desc MemPD; // short name for memory::primitive_desc - memory::format cmfmt = mfmt_nchw; + + shared_ptr usr_bottom_data_mpd(new MemPD({{bottom_tz}, mpcsn, mfmt_nchw}, cpu_engine)); + shared_ptr usr_top_data_mpd(new MemPD({{top_tz}, mpcsn, mfmt_nchw}, cpu_engine)); + if (bottom_data_is_prv) { shared_ptr > mem_descr = get_mkldnn_prv_descriptor(bottom[0]); cmfmt = static_cast(mem_descr->prv_memory_pd()->desc().data.format); + mpcsn = static_cast(mem_descr->prv_memory_pd()->desc().data.data_type); } shared_ptr init_fwd_bottom_md(new memory::desc({bottom_tz}, mpcsn, cmfmt)); shared_ptr init_fwd_top_md(new memory::desc({top_tz}, mpcsn, cmfmt)); - shared_ptr usr_bottom_data_mpd(new MemPD({{bottom_tz}, mpcsn, mfmt_nchw}, cpu_engine)); - shared_ptr usr_top_data_mpd(new MemPD({{top_tz}, mpcsn, mfmt_nchw}, cpu_engine)); // ---- Initialize pooling primitive descriptor ------------- pooling_forward::desc poolingFwd_desc(propagation, pooling_algorithm, *init_fwd_bottom_md,*init_fwd_top_md , {sh, sw}, {kh, kw}, {pt, pl}, {pb, pr}, padding_kind::zero); @@ -275,7 +277,7 @@ void MKLDNNPoolingLayer::InitPoolingFwd(const vector*>& botto fwd_top_data.reset(new MKLDNNData(usr_top_data_mpd, prv_fwd_top_data_mpd, top[0], this)); fwd_top_data_memory = fwd_top_data->create_output_memory(); - if ( propagation == prop_kind::forward_training && + if (propagation == prop_kind::forward_training && pooling_algorithm != algorithm::pooling_avg_exclude_padding && pooling_algorithm != algorithm::pooling_avg_include_padding) { indices_pd.reset(new MemPD(poolingFwd_pd->workspace_primitive_desc())); @@ -284,8 +286,13 @@ void MKLDNNPoolingLayer::InitPoolingFwd(const vector*>& botto } else { poolingFwd.reset(new pooling_forward(*poolingFwd_pd, *fwd_bottom_data_primitive, *fwd_top_data_memory)); } - fwd_bottom_data->set_mkldnn_primitive(poolingFwd); - fwd_top_data->set_mkldnn_primitive(poolingFwd); + //fwd_bottom_data->set_mkldnn_primitive(poolingFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_bottom_data_primitive_transfer(fwd_bottom_data_primitive); + fwd_bottom_data->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); + + //fwd_top_data->set_mkldnn_primitive(poolingFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_top_data_memory_transfer(fwd_top_data_memory); + fwd_top_data->set_mkldnn_primitive(fwd_top_data_memory_transfer); } // TODO(Yangqing): Is there a faster way to do pooling in the channel-first @@ -418,7 +425,7 @@ void MKLDNNPoolingLayer::InitPoolingBwd(const vector*>& top // ---- Initialize remaining memory descriptors ------------- shared_ptr prv_bwd_bottom_diff_mpd, prv_bwd_top_diff_mpd; - if (top_diff_is_prv) { + if (top_diff_is_prv || bottom_data_is_prv) { prv_bwd_bottom_diff_mpd.reset(new MemPD(*init_bwd_bottom_md, engine)); prv_bwd_top_diff_mpd.reset(new MemPD(*init_bwd_top_md, engine)); } @@ -440,8 +447,13 @@ void MKLDNNPoolingLayer::InitPoolingBwd(const vector*>& top else poolingBwd.reset(new pooling_backward(*poolingBwd_pd, *bwd_top_diff_primitive, *bwd_bottom_diff_memory)); - bwd_bottom_diff->set_mkldnn_primitive(poolingBwd); - bwd_top_diff->set_mkldnn_primitive(poolingBwd); + //bwd_bottom_diff->set_mkldnn_primitive(poolingBwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_bottom_diff_memory_transfer(bwd_bottom_diff_memory); + bwd_bottom_diff->set_mkldnn_primitive(bwd_bottom_diff_memory_transfer); + + //bwd_top_diff->set_mkldnn_primitive(poolingBwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_top_diff_primitive_transfer(bwd_top_diff_primitive); + bwd_top_diff->set_mkldnn_primitive(bwd_top_diff_primitive_transfer); } template diff --git a/src/caffe/layers/mkldnn_relu_layer.cpp b/src/caffe/layers/mkldnn_relu_layer.cpp index 273e834d8..6e0f93b67 100644 --- a/src/caffe/layers/mkldnn_relu_layer.cpp +++ b/src/caffe/layers/mkldnn_relu_layer.cpp @@ -99,7 +99,10 @@ void MKLDNNReLULayer::InitReLUFwd(const vector*>& bottom, con top_data_md = bottom_data_md; // ---- Initialize relu primitive descriptor ------------- - relu_forward::desc reluFwd_desc(propagation, *bottom_data_md, negative_slope); + //relu_forward::desc reluFwd_desc(propagation, *bottom_data_md, negative_slope); + // MKLDNN is deprecating standalone relu primitive in MKL-DNN. + // Now MKLDNN has eltwise primitive with eltwise_relu algorithm inside. + eltwise_forward::desc eltwise_reluFwd_desc(propagation, eltwise_relu, *bottom_data_md, negative_slope); // ---- Determining engine to use ----------------------- std::string subengines = this->layer_param_.engine(); @@ -109,7 +112,7 @@ void MKLDNNReLULayer::InitReLUFwd(const vector*>& bottom, con unsigned subEngineIndex = 0; for(; subEngineIndex < ep.getNumberOfSubEngines(); subEngineIndex++) { try { - reluFwd_pd.reset(new relu_forward::primitive_desc(reluFwd_desc, + reluFwd_pd.reset(new relu_forward::primitive_desc(eltwise_reluFwd_desc, ep.getMKLDNNSubEngine(subEngineIndex))); } catch(...) { @@ -129,9 +132,13 @@ void MKLDNNReLULayer::InitReLUFwd(const vector*>& bottom, con fwd_top_data_memory = fwd_top_data->create_output_memory(inplace); reluFwd.reset(new relu_forward(*reluFwd_pd, *fwd_bottom_data_primitive, *fwd_top_data_memory)); - fwd_bottom_data->set_mkldnn_primitive(reluFwd); - fwd_top_data->set_mkldnn_primitive(reluFwd); + //fwd_bottom_data->set_mkldnn_primitive(reluFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_bottom_data_primitive_transfer(fwd_bottom_data_primitive); + fwd_bottom_data->set_mkldnn_primitive(fwd_bottom_data_primitive_transfer); + //fwd_top_data->set_mkldnn_primitive(reluFwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive fwd_top_data_memory_transfer(fwd_top_data_memory); + fwd_top_data->set_mkldnn_primitive(fwd_top_data_memory_transfer); } @@ -147,6 +154,11 @@ void MKLDNNReLULayer::Forward_cpu(const vector*>& bottom bool inplace = (bottom[0] == top[0]); if( reluFwd_pd == NULL) InitReLUFwd(bottom, top); + + if(this->layer_param_.relu_param().fuse()) { + top[0]->ShareData(*bottom[0]); + return; + } // making reorders if needed. fwd_bottom_data->sync_before_read(); // update top that head at prv @@ -239,7 +251,10 @@ void MKLDNNReLULayer::InitReLUBwd(const vector*>& top bottom_diff_md = top_diff_md; // ---- Initialize relu primitive descriptor ------------- - relu_backward::desc reluBwd_desc(*top_diff_md, *top_data_md, negative_slope); + //relu_backward::desc reluBwd_desc(*top_diff_md, *top_data_md, negative_slope); + // MKLDNN is deprecating standalone relu primitive in MKL-DNN. + // Now MKLDNN has eltwise primitive with eltwise_relu algorithm inside. + eltwise_backward::desc eltwise_reluBwd_desc(eltwise_relu, *top_diff_md, *top_data_md, negative_slope); // ---- Determining engine to use ----------------------- std::string subengines = this->layer_param_.engine(); @@ -249,7 +264,7 @@ void MKLDNNReLULayer::InitReLUBwd(const vector*>& top unsigned subEngineIndex = 0; for(; subEngineIndex < ep.getNumberOfSubEngines(); subEngineIndex++) { try { - reluBwd_pd.reset(new relu_backward::primitive_desc(reluBwd_desc, + reluBwd_pd.reset(new relu_backward::primitive_desc(eltwise_reluBwd_desc, ep.getMKLDNNSubEngine(subEngineIndex), *reluFwd_pd)); } catch(...) { @@ -269,8 +284,13 @@ void MKLDNNReLULayer::InitReLUBwd(const vector*>& top bwd_bottom_diff_memory = bwd_bottom_diff->create_output_memory(inplace); reluBwd.reset(new relu_backward(*reluBwd_pd, *fwd_bottom_data_primitive, *bwd_top_diff_primitive, *bwd_bottom_diff_memory)); - bwd_top_diff->set_mkldnn_primitive(reluBwd); - bwd_bottom_diff->set_mkldnn_primitive(reluBwd); + //bwd_top_diff->set_mkldnn_primitive(reluBwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_top_diff_primitive_transfer(bwd_top_diff_primitive); + bwd_top_diff->set_mkldnn_primitive(bwd_top_diff_primitive_transfer); + + //bwd_bottom_diff->set_mkldnn_primitive(reluBwd); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_bottom_diff_memory_transfer(bwd_bottom_diff_memory); + bwd_bottom_diff->set_mkldnn_primitive(bwd_bottom_diff_memory_transfer); } template diff --git a/src/caffe/layers/mkldnn_split_layer.cpp b/src/caffe/layers/mkldnn_split_layer.cpp index 5e6cf9bab..ab2c5156a 100644 --- a/src/caffe/layers/mkldnn_split_layer.cpp +++ b/src/caffe/layers/mkldnn_split_layer.cpp @@ -163,10 +163,14 @@ void MKLDNNSplitLayer::InitSplitBwd(const vector*>& bottom, // there may be reorders to be done for inputs(tops' diffs) // so it match SplitBwd primitive inputs format expectations for(int i = 0; i < top.size(); ++i) { - bwd_top_diffs_[i]->set_mkldnn_primitive(splitBwd_); + //bwd_top_diffs_[i]->set_mkldnn_primitive(splitBwd_); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_top_diff_primitive_transfer(bwd_top_diff_primitives_[i]); + bwd_top_diffs_[i]->set_mkldnn_primitive(bwd_top_diff_primitive_transfer); } - bwd_bottom_diff_->set_mkldnn_primitive(splitBwd_); + //bwd_bottom_diff_->set_mkldnn_primitive(splitBwd_); //Wrong passed primitive! (TODO: Checking!) + MKLDNNPrimitive bwd_bottom_diff_memory_transfer(bwd_bottom_diff_memory_); + bwd_bottom_diff_->set_mkldnn_primitive(bwd_bottom_diff_memory_transfer); } diff --git a/src/caffe/mkldnn_memory.cpp b/src/caffe/mkldnn_memory.cpp index ddad67f57..bacb6ae61 100644 --- a/src/caffe/mkldnn_memory.cpp +++ b/src/caffe/mkldnn_memory.cpp @@ -212,10 +212,11 @@ void MKLDNNMemoryDescriptor::convert_from_extprv(shared_ptr_reorder_extprv2prv_pd == NULL) return; - if (this->_extprv_memory_pd->desc().data.format == this->_prv_memory_pd->desc().data.format) + if (this->_extprv_memory_pd->desc().data.format == this->_prv_memory_pd->desc().data.format && + this->_extprv_memory_pd->desc().data.data_type == this->_prv_memory_pd->desc().data.data_type) { #ifdef DEBUG - LOG(INFO) << "The format of _extprv_memory_pd and _prv_memory_pd is same, no need do conversion."; + LOG(INFO) << "The format and data_type of _extprv_memory_pd and _prv_memory_pd is same, no need do conversion."; #endif return; } diff --git a/src/caffe/multinode/mlsl.cpp b/src/caffe/multinode/mlsl.cpp index 1653c5692..8a0f772af 100644 --- a/src/caffe/multinode/mlsl.cpp +++ b/src/caffe/multinode/mlsl.cpp @@ -41,26 +41,25 @@ #include "boost/thread/mutex.hpp" #include "caffe/multinode/mlsl.hpp" -namespace { - - __attribute__((constructor)) - void init(int argc, char **argv) { - static class initialize { - public: - initialize(int* argc, char** argv[]) { - MLSL::Environment::GetEnv().Init(argc, argv); - } - ~initialize() { - MLSL::Environment::GetEnv().Finalize(); - } - } __init{ &argc, &argv }; - } -} - namespace caffe { namespace mn { boost::mutex distrib_lock; - std::map, boost::shared_ptr> distrib_map; + std::map, boost::shared_ptr> *distrib_map; + + void init(int* argc, char **argv[]) { + static class initialize { + public: + initialize(int* argc, char** argv[]) { + MLSL::Environment::GetEnv().Init(argc, argv); + distrib_map = + new std::map, boost::shared_ptr>(); + } + ~initialize() { + delete distrib_map; + MLSL::Environment::GetEnv().Finalize(); + } + } __init{ argc, argv }; + } shared_ptr create_distrib( int dataParts, int modelParts, int dataColor, int modelColor, @@ -73,15 +72,15 @@ namespace caffe { Distribution * get_distrib(int dataParts, int modelParts) { boost::mutex::scoped_lock l(distrib_lock); std::pair key = std::make_pair(dataParts, modelParts); - if (distrib_map.find(key) == distrib_map.end()) { + if (distrib_map->find(key) == distrib_map->end()) { int node_id = get_node_id(); int num_nodes = get_nodes_count(); int modelColor = node_id / modelParts; int dataColor = node_id % (num_nodes / dataParts); - distrib_map[key] = boost::shared_ptr( + (*distrib_map)[key] = boost::shared_ptr( new Distribution(dataParts, modelParts, dataColor, modelColor)); } - return distrib_map[key].get(); + return (*distrib_map)[key].get(); } Distribution * get_distrib() { diff --git a/src/caffe/multinode/multi_solver.cpp b/src/caffe/multinode/multi_solver.cpp index 86e9b37ef..13ad8da2b 100644 --- a/src/caffe/multinode/multi_solver.cpp +++ b/src/caffe/multinode/multi_solver.cpp @@ -46,79 +46,141 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace caffe { -template -Dtype MultiSolver::ForwardBackwardImpl(bool first, bool last) { +#ifdef CAFFE_PER_LAYER_TIMINGS +#define LAYER_TIMING_START() do { \ + root_solver_->timer.Start(); \ +}while(0) - Dtype loss = 0; +#define LAYER_TIMING_STOP(name, index) do { \ + root_solver_->name##_time_per_layer[index] += root_solver_->timer.MicroSeconds(); \ +}while(0) +#else +#define LAYER_TIMING_START() + +#define LAYER_TIMING_STOP(name,index) +#endif + +template +inline bool MultiSolver::IsSkipWaitGradient(int layer_id) { Net& net = *root_solver_->net(); const std::vector>>& layers{ net.layers() }; const std::vector& layer_need_backward{ net.layer_need_backward() }; -#ifdef CAFFE_PER_LAYER_TIMINGS - Timer& timer = root_solver_->timer; - std::vector& forward_time_per_layer = root_solver_->forward_time_per_layer; - std::vector& backward_time_per_layer = root_solver_->backward_time_per_layer; - std::vector& update_time_per_layer = root_solver_->update_time_per_layer; -#endif /* CAFFE_PER_LAYER_TIMINGS */ + if (!layer_need_backward[layer_id] || ((layers[layer_id]->layerOp != nullptr) + && !layers[layer_id]->layerOp->HasParameterSets())) { + DLOG(INFO) << "ForwardBackwardImpl: no need for apply_updates for layer # " + << layer_id << ", skip on_delwt_wait, apply_updates, on_wtinc_ready"; + return true; + } + return false; +} - net.ClearParamDiffs(); +template +inline void MultiSolver::WaitAndUpdateGradient(int layer_id) { + LAYER_TIMING_START(); + for (int j = 0; j < callbacks_.size(); ++j) { + callbacks_[j]->on_delwt_wait(layer_id); + } + LAYER_TIMING_STOP(waitcomm, layer_id); - for (int i = 0; i < layers.size(); ++i) { -#ifdef CAFFE_PER_LAYER_TIMINGS - timer.Start(); +#ifdef FW_OVERLAP_OPT + if (layer_finished_flags_[layer_id]) { #endif - loss += net.ForwardFromTo(i, i); + LAYER_TIMING_START(); + for (int j = 0; j < callbacks_.size(); ++j) { + callbacks_[j]->apply_updates(layer_id); + } + LAYER_TIMING_STOP(update, layer_id); +#ifdef FW_OVERLAP_OPT + } +#endif +} -#ifdef CAFFE_PER_LAYER_TIMINGS - forward_time_per_layer[i] += timer.MicroSeconds(); +template +Dtype MultiSolver::ForwardBackwardImpl(bool first, bool last) { + Dtype loss = 0; + Net& net = *root_solver_->net(); + const std::vector>>& layers{ net.layers() }; + const std::vector& layer_need_backward{ net.layer_need_backward() }; + + for (int i = 0; i < layers.size(); ++i) { +#ifdef FW_OVERLAP_OPT + if (first && IsSkipWaitGradient(i) == false) { + while (layer_finished_flags_[i] == false) { + WaitAndUpdateGradient(i); + if (layer_finished_flags_[i]) + break; + + for (int k=i+1; k= 0; --i) { -#ifdef CAFFE_PER_LAYER_TIMINGS - timer.Start(); -#endif - if (!layer_need_backward[i]) { continue; } - + + LAYER_TIMING_START(); net.BackwardFromTo(i, i); + LAYER_TIMING_STOP(backward, i); - if (last && (layers[i]->layerOp != nullptr) && layers[i]->layerOp->HasParameterSets()) { + if (last && (layers[i]->layerOp != nullptr) + && layers[i]->layerOp->HasParameterSets()) { + LAYER_TIMING_START(); for (int j = 0; j < callbacks_.size(); ++j) { - callbacks_[j]->on_iter_finished(i); + callbacks_[j]->on_iter_finished(i); } + LAYER_TIMING_STOP(startcomm, i); } - -#ifdef CAFFE_PER_LAYER_TIMINGS - backward_time_per_layer[i] += timer.MicroSeconds(); -#endif } +#ifdef FW_OVERLAP_OPT + int iter = root_solver_->iter(); + int max_iter = root_solver_->param().max_iter(); + bool test = (root_solver_->param().test_interval() + && ((iter + 1) % root_solver_->param().test_interval() == 0)); + if (last && (test || (iter == max_iter - 1))) { + int finished_count = 0; + while (finished_count < layers.size()) { +#else if (last) { - - for (int i = 0; i < layers.size(); ++i) { -#ifdef CAFFE_PER_LAYER_TIMINGS - timer.Start(); #endif - if (!layer_need_backward[i] || ((layers[i]->layerOp != nullptr) && !layers[i]->layerOp->HasParameterSets())) { - DLOG(INFO) << "ForwardBackwardImpl: no need for apply_updates for layer # " << i - << ", skip on_delwt_wait, apply_updates, on_wtinc_ready"; - continue; - } - - for (int j = 0; j < callbacks_.size(); ++j) { - callbacks_[j]->on_delwt_wait(i); - } + for (int i = 0; i < layers.size(); ++i) { + if (IsSkipWaitGradient(i)) { +#ifdef FW_OVERLAP_OPT + finished_count++; + layer_finished_flags_[i] = true; +#endif + continue; + } +#ifdef FW_OVERLAP_OPT + if (layer_finished_flags_[i]) + continue; +#endif - for (int j = 0; j < callbacks_.size(); ++j) { - callbacks_[j]->apply_updates(i); - } -#ifdef CAFFE_PER_LAYER_TIMINGS - update_time_per_layer[i] += timer.MicroSeconds(); + WaitAndUpdateGradient(i); +#ifdef FW_OVERLAP_OPT + if (layer_finished_flags_[i]) + finished_count++; #endif + } +#ifdef FW_OVERLAP_OPT } +#endif } DLOG(WARNING) << "iter " << root_solver_->iter() << ", loss " << loss; @@ -128,6 +190,7 @@ Dtype MultiSolver::ForwardBackwardImpl(bool first, bool last) { template Dtype MultiSolver::ForwardBackward() { Dtype loss = 0; + root_solver_->net()->ClearParamDiffs(); for (int i = 0; i < iter_size; ++i) { loss += ForwardBackwardImpl( (i == 0), (i + 1 == iter_size)); diff --git a/src/caffe/multinode/multi_sync.cpp b/src/caffe/multinode/multi_sync.cpp index eb6229ed4..448172c7b 100644 --- a/src/caffe/multinode/multi_sync.cpp +++ b/src/caffe/multinode/multi_sync.cpp @@ -53,12 +53,19 @@ MultiSync::MultiSync(shared_ptr > root_solver) root_solver->set_iter(1); layer_param_ids.resize(layers.size()); +#ifdef FW_OVERLAP_OPT + param_ids_finished_flags.resize(layers.size()); +#endif for (int layer_id = 0; layer_id < layers.size(); layer_id++) { shared_ptr > layer = layers[layer_id]; /* cache param ids */ layer_param_ids[layer_id] = net->get_layer_learnable_param_ids(layer_id); +#ifdef FW_OVERLAP_OPT + param_ids_finished_flags[layer_id].resize(layer_param_ids[layer_id].size()); + std::fill(param_ids_finished_flags[layer_id].begin(), param_ids_finished_flags[layer_id].end(), false); +#endif } } diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index 19e3dd7e1..a4224f9ba 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -267,6 +267,8 @@ void Net::Init(const NetParameter& in_param) { batch_size = layer_param.memory_data_param().batch_size(); else if (!layer_param.type().compare("WindowData")) batch_size = layer_param.window_data_param().batch_size(); + else if (!layer_param.type().compare("Input")) + batch_size = layer_param.input_param().shape(0).dim(0); if (caffe::TRAIN == param.state().phase()) { LOG(WARNING) << "SetMinibatchSize " << batch_size; @@ -493,7 +495,7 @@ void Net::CompileNet(const NetParameter& param, NetParameter param_temp0; param_temp0.CopyFrom(param); param_temp0.clear_layer(); - RemoveBNScale(param, ¶m_temp0); + RemoveBNScale(param, ¶m_temp0); NetParameter param_temp; // temporary compiled param param_temp.CopyFrom(param_temp0); @@ -616,26 +618,8 @@ void Net::CompilationRuleTwo(const NetParameter& param, // then we can remove ReLU layer // and rename Convolution top blob after deleted ReLU's top // Note: Currently merging of convolution and relu layers is feasible - // only for caffe::TEST phase, as there is no Backward primitive of conv Relu - // If current layer is Convolution of MKLDNN engine.. - /* - //Old Structure: if ((A == TEST) && (B == 0) && ((C == ConvolutionParameter_Engine_MKLDNN) || ((D == ConvolutionParameter_Engine_DEFAULT) && ((E == 0 && F == string::npos)) || ((G == "" && H == 0 && I == string::npos))))) - //New tmp Structure: if ((A == TEST) && (B == 0) && ((C == ConvolutionParameter_Engine_MKLDNN) || (((D == ConvolutionParameter_Engine_DEFAULT) && ((E == 0 && F == string::npos))) || ((G == "" && H == 0 && I == string::npos))))) - //New Structure: if ((A == TEST) && (B == 0) && ((C == ConvolutionParameter_Engine_MKLDNN) || (((D == ConvolutionParameter_Engine_DEFAULT) && (E == 0 && F == string::npos)) || (G == "" && H == 0 && I == string::npos)))) - //Old Structure: - //if ((A == TEST) && - // (B == 0) && - // ((C == ConvolutionParameter_Engine_MKLDNN) - // || ((D == ConvolutionParameter_Engine_DEFAULT) && - // ((E == 0 - // && F == string::npos)) || - // ((G == "" && - // H == 0 && - // I == string::npos))))) - */ - if ((param.state().phase() == TEST) && - (layer_param->type().compare("Convolution") == 0) && + if ((layer_param->type().compare("Convolution") == 0) && ((layer_param->convolution_param().engine() == ConvolutionParameter_Engine_MKLDNN) || (((layer_param->convolution_param().engine() == ConvolutionParameter_Engine_DEFAULT) && (param.engine().compare(0, 6, "MKLDNN") == 0 @@ -652,20 +636,6 @@ void Net::CompilationRuleTwo(const NetParameter& param, // Consumer layer of blob produced by Conv // has to be ReLU layer with one Input Blob - /* - //Old Structure: if ((A == 0) && ((B == ReLUParameter_Engine_MKLDNN) || ((C == ReLUParameter_Engine_DEFAULT) && ((D == 0 && E == string::npos)) || ((F == "" && G == 0 && H == string::npos))))) - //New tmp Structure: if ((A == 0) && ((B == ReLUParameter_Engine_MKLDNN) || (((C == ReLUParameter_Engine_DEFAULT) && ((D == 0 && E == string::npos))) || ((F == "" && G == 0 && H == string::npos))))) - //New Structure: if ((A == 0) && ((B == ReLUParameter_Engine_MKLDNN) || (((C == ReLUParameter_Engine_DEFAULT) && (D == 0 && E == string::npos)) || (F == "" && G == 0 && H == string::npos)))) - //Old Structure: - //if ((A == 0) && - // ((B == ReLUParameter_Engine_MKLDNN) - // || ((C == ReLUParameter_Engine_DEFAULT) && - // ((D == 0 - // && E == string::npos)) || - // ((F == "" && - // G == 0 && - // H == string::npos))))) - */ if ((consumer_layer_param.type().compare("ReLU") == 0) && ((consumer_layer_param.relu_param().engine() == ReLUParameter_Engine_MKLDNN) || (((consumer_layer_param.relu_param().engine() == ReLUParameter_Engine_DEFAULT) && @@ -676,30 +646,43 @@ void Net::CompilationRuleTwo(const NetParameter& param, layer_param->engine().find(":DLA", 6) == string::npos)))) { string& convolution_top_blob_name = const_cast(layer_param->top(0)); - const string& scale_top_blob_name = consumer_layer_param.top(0); - // Mark Consumer layer (its name) as the one marked for dropping - layers_to_drop.insert(consumer_layer_param.name()); - // Replace Convolution top name with ReLU top name - convolution_top_blob_name.resize(scale_top_blob_name.size()); - convolution_top_blob_name.replace(0, - scale_top_blob_name.size(), - scale_top_blob_name); + if(param.state().phase() == TEST) { + const string& scale_top_blob_name = consumer_layer_param.top(0); + // Mark Consumer layer (its name) as the one marked for dropping + layers_to_drop.insert(consumer_layer_param.name()); + + // Replace Convolution top name with ReLU top name + convolution_top_blob_name.resize(scale_top_blob_name.size()); + convolution_top_blob_name.replace(0, + scale_top_blob_name.size(), + scale_top_blob_name); + } // set relu flag in convolution layer_param->mutable_convolution_param()->set_relu(true); float negative_slope1 = consumer_layer_param.relu_param().negative_slope(); layer_param->mutable_convolution_param()-> set_negative_slope(negative_slope1); + + if(param.state().phase() == TRAIN) { + if(i+1 < param.layer_size()) { + LayerParameter* relu_layer_param = + (const_cast(param)).mutable_layer(i+1); + relu_layer_param->mutable_relu_param()->set_fuse(true); + } + } } } - if (layers_to_drop.find(layer_param->name()) != layers_to_drop.end()) { - LOG_IF(INFO, Caffe::root_solver()) << "Dropped layer: " - << layer_param->name() << std::endl; - layer_included = false; - // Remove dropped layer from the list of layers to be dropped - layers_to_drop.erase(layers_to_drop.find(layer_param->name())); + if(param.state().phase() == TEST) { + if (layers_to_drop.find(layer_param->name()) != layers_to_drop.end()) { + LOG_IF(INFO, Caffe::root_solver()) << "Dropped layer: " + << layer_param->name() << std::endl; + layer_included = false; + // Remove dropped layer from the list of layers to be dropped + layers_to_drop.erase(layers_to_drop.find(layer_param->name())); + } } if (layer_included) { @@ -763,107 +746,6 @@ void Net::CompilationRuleThree(const NetParameter& param, return; } - -template -void Net::RemoveBNScale(const NetParameter& param, - NetParameter* param_compiled) { - // - In TEST Phase, if we detect sequential layers conv->batch norm ->scale, - // We will merge batch norm and scale layer into conv layer. - if(param.state().phase() != TEST) { - param_compiled->CopyFrom(param); - param_compiled->mutable_compile_net_state()->set_bn_scale_remove(false); - return ; - } - - bool bn_scale_remove = false; - bool is_net_init = param.compile_net_state().is_init(); - std::set layers_to_drop; - for (int i = 0; i < param.layer_size(); ++i) { - LayerParameter *layer_param = (const_cast(param)).mutable_layer(i); - bool layer_included = true; - bool bn_use_global_stats_set = true; - if (layer_param->type().compare("Convolution") == 0) { - std::vector child_layers_params; - GetBlobConsumers(child_layers_params, layer_param->top(0), param, i + 1 < param.layer_size() ? i + 1 : i); - const LayerParameter &child_layer_param = child_layers_params.size() > 0 ? *(child_layers_params[0]) : *layer_param; - // check whether child layer is BatchNorm - if (child_layer_param.type().compare("BatchNorm") == 0) { - BatchNormParameter bn_param = child_layer_param.batch_norm_param(); - if (is_net_init) { - //Testing Network init process - bool bn_use_global_stats = true; - if (bn_param.has_use_global_stats()) { - bn_use_global_stats = bn_param.use_global_stats(); - } - if (!bn_use_global_stats) { - //This bn layer's use_global_stats is set manually! Don't remove it. - //remained_bn_layer_names.push_back(child_layer_param.name()); - param_compiled->mutable_compile_net_state()->add_kept_bn_layers(child_layer_param.name()); - bn_use_global_stats_set = false; - } - } else { - int kept_bn_layers_num = param.compile_net_state().kept_bn_layers_size(); - bool in_kept_list = false; - for (int idx = 0; idx < kept_bn_layers_num; ++idx) { - if (child_layer_param.name().compare(param.compile_net_state().kept_bn_layers(idx)) == 0) { - in_kept_list = true; - break; - } - } - if (in_kept_list) { - bn_use_global_stats_set = false; - } - } - - if (!bn_use_global_stats_set) { - //Even in caffe TEST phase, current batch norm layer has set use_global_stats = false in protxt file, so we won't - //merge this layer into convolution layer. - param_compiled->add_layer()->CopyFrom(*layer_param); - continue; - } - std::vector grandchild_layers_params; - GetBlobConsumers(grandchild_layers_params, child_layer_param.top(0), param, i + 2 < param.layer_size() ? i + 2 : i); - const LayerParameter &grandchild_layer_param = (grandchild_layers_params.size() > 0) ? *(grandchild_layers_params[0]) : child_layer_param; - if (grandchild_layer_param.type().compare("Scale") == 0) { - MergeLayer(*layer_param, grandchild_layer_param); - AdjustConvLayer(*layer_param, child_layer_param, grandchild_layer_param, is_net_init); - if (bn_scale_remove == false) bn_scale_remove = true; - layers_to_drop.insert(child_layer_param.name()); - layers_to_drop.insert(grandchild_layer_param.name()); - } else if (&child_layer_param != &grandchild_layer_param) { - //In fact, conv-->batchnorm can also be optimized. In such case, we check the blob size of batch norm layer - //if is 3, it means current net hasn't used scale layer, this is equivalent to scale layer with all 1 weights and 0 bias - //if is 4 or 5, it means intel caffe compilation rule 1 works here, we can recover the scale layer from batch norm layer - MergeLayer(*layer_param, child_layer_param); - if (!is_net_init) { - shared_ptr scale_layer_param(new LayerParameter()); - RecoverScaleFromBN(child_layer_param, *scale_layer_param, (Dtype)1, (Dtype)0); - AdjustConvLayer(*layer_param, child_layer_param, *scale_layer_param, is_net_init); - } else { - AdjustConvLayer(*layer_param, child_layer_param, grandchild_layer_param, true); - } - if (bn_scale_remove == false) bn_scale_remove = true; - layers_to_drop.insert(child_layer_param.name()); - } - } - } - if (layers_to_drop.find(layer_param->name()) != layers_to_drop.end()) { - LOG_IF(INFO, Caffe::root_solver()) << "Dropped Layer: "<< layer_param->name() << std::endl; - layer_included = false; - // Remove dropped layer from the list of layers to be dropped - layers_to_drop.erase(layers_to_drop.find(layer_param->name())); - } - if (layer_included) { - if (layer_param->type().compare("BatchNorm") == 0) { - param_compiled->mutable_compile_net_state()->add_kept_bn_layers(layer_param->name()); - } - param_compiled->add_layer()->CopyFrom(*layer_param); - } - } - - param_compiled->mutable_compile_net_state()->set_bn_scale_remove(bn_scale_remove); - } - template void Net::GetBlobConsumers( std::vector& consumer_blobs, diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index c4c5228e5..3bf537607 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -645,6 +645,14 @@ message TransformationParameter { optional ExpansionParameter expand_param = 14; // Constraint for emitting the annotation after transformation. optional EmitConstraint emit_constraint = 10; + // Resize the input randomly + optional RandomResizeParameter random_resize_param = 15; +} + +message RandomResizeParameter { + optional uint32 min_size = 1 [default = 0]; + optional uint32 max_size = 2 [default = 0]; + optional ResizeParameter resize_param = 3; } // Message that stores parameters used by data transformer for resize policy @@ -1626,6 +1634,7 @@ message ReLUParameter { MKLDNN = 4; } optional Engine engine = 2 [default = DEFAULT]; + optional bool fuse = 3 [default = false]; } message ReshapeParameter { diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp index cf8c31b47..3f17c5c58 100644 --- a/src/caffe/solver.cpp +++ b/src/caffe/solver.cpp @@ -394,10 +394,17 @@ void Solver::InitTimers() { this->forward_time_per_layer.resize(layer_count, 0.0); this->backward_time_per_layer.resize(layer_count, 0.0); this->update_time_per_layer.resize(layer_count, 0.0); - +#ifdef USE_MLSL + this->startcomm_time_per_layer.resize(layer_count, 0.0); + this->waitcomm_time_per_layer.resize(layer_count, 0.0); +#endif this->forward_time_per_layer_total.resize(layer_count, 0.0); this->backward_time_per_layer_total.resize(layer_count, 0.0); this->update_time_per_layer_total.resize(layer_count, 0.0); +#ifdef USE_MLSL + this->startcomm_time_per_layer_total.resize(layer_count, 0.0); + this->waitcomm_time_per_layer_total.resize(layer_count, 0.0); +#endif } template @@ -419,6 +426,19 @@ void Solver::ResetTimers() { this->update_time_per_layer.begin(), this->update_time_per_layer_total.begin(), std::plus()); +#ifdef USE_MLSL + std::transform(this->startcomm_time_per_layer_total.begin(), + this->startcomm_time_per_layer_total.end(), + this->startcomm_time_per_layer.begin(), + this->startcomm_time_per_layer_total.begin(), + std::plus()); + + std::transform(this->waitcomm_time_per_layer_total.begin(), + this->waitcomm_time_per_layer_total.end(), + this->waitcomm_time_per_layer.begin(), + this->waitcomm_time_per_layer_total.begin(), + std::plus()); +#endif std::fill(this->forward_time_per_layer.begin(), this->forward_time_per_layer.end(), 0); @@ -426,6 +446,12 @@ void Solver::ResetTimers() { this->backward_time_per_layer.end(), 0); std::fill(this->update_time_per_layer.begin(), this->update_time_per_layer.end(), 0); +#ifdef USE_MLSL + std::fill(this->startcomm_time_per_layer.begin(), + this->startcomm_time_per_layer.end(), 0); + std::fill(this->waitcomm_time_per_layer.begin(), + this->waitcomm_time_per_layer.end(), 0); +#endif } template @@ -444,7 +470,13 @@ void Solver::PrintTimers(bool printTotal) { backward_time_per_layer_total : backward_time_per_layer; std::vector& update_timers = printTotal ? update_time_per_layer_total : update_time_per_layer; +#ifdef USE_MLSL + std::vector& startcomm_timers = printTotal ? + startcomm_time_per_layer_total : startcomm_time_per_layer; + std::vector& waitcomm_timers = printTotal ? + waitcomm_time_per_layer_total : waitcomm_time_per_layer; std::string prefix = printTotal ? "TOTAL " : "DELTA "; +#endif double forward_time = std::accumulate(forward_timers.begin(), forward_timers.end(), 0) / 1000; @@ -479,8 +511,37 @@ void Solver::PrintTimers(bool printTotal) { } LOG(WARNING) << std::endl; - LOG(WARNING) << prefix << "TIME (F+B+U): " << (forward_time + - backward_time + update_time) / 1000 << " sec"; +#ifdef USE_MLSL + double startcomm_time = std::accumulate(startcomm_timers.begin(), + startcomm_timers.end(), 0) / 1000; + LOG(WARNING) << prefix << "START COMMUNICATION TIME: " << startcomm_time << " ms"; + for (int layer_idx = 0; layer_idx < net_->layers().size(); layer_idx++) { + LOG(WARNING) << "LAYER-" << layer_idx << " " + << net_->layers()[layer_idx]->type() + << ": startcomm_time: " << startcomm_timers[layer_idx] / 1000 + << " ms"; + } + LOG(WARNING) << std::endl; + + double waitcomm_time = std::accumulate(waitcomm_timers.begin(), + waitcomm_timers.end(), 0) / 1000; + LOG(WARNING) << prefix << "WAIT COMMUNICATION TIME: " << waitcomm_time << " ms"; + for (int layer_idx = 0; layer_idx < net_->layers().size(); layer_idx++) { + LOG(WARNING) << "LAYER-" << layer_idx << " " + << net_->layers()[layer_idx]->type() + << ": waitcomm_time: " << waitcomm_timers[layer_idx] / 1000 + << " ms"; + } + LOG(WARNING) << std::endl; + + LOG(WARNING) << prefix << "TIME (Computation + Communication): " << (forward_time + + backward_time + update_time + startcomm_time + waitcomm_time) / 1000 + << " sec"; +#else + LOG(WARNING) << prefix << "TIME (Computation): " << (forward_time + + backward_time + update_time) / 1000 << " sec"; +#endif + LOG(WARNING) << "####################################################"; LOG(WARNING) << std::endl; } diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp index ae4d3f03f..5b97a8bfb 100644 --- a/src/caffe/test/test_net.cpp +++ b/src/caffe/test/test_net.cpp @@ -2987,6 +2987,230 @@ class CompileNetTest : public ::testing::Test { } }; +TEST_F(CompileNetTest, TestRemoveBatchNorm1) { + const string& input_proto = + "name: 'TestNetwork' " + "layer { " + " name: 'data' " + " type: 'Data' " + " top: 'data' " + " top: 'label' " + "} " + "layer { " + " bottom: 'data' " + " name: 'conv' " + " top: 'conv' " + " type: 'Convolution' " + "} " + "layer { " + " bottom: 'conv' " + " name: 'bn' " + " top: 'conv' " + " type: 'BatchNorm' " + "} " + "layer { " + " name: 'loss' " + " type: 'SoftmaxWithLoss' " + " bottom: 'conv' " + " bottom: 'label' " + "} "; + + const string& output_proto = + "name: 'TestNetwork' " + "layer { " + " name: 'data' " + " type: 'Data' " + " top: 'data' " + " top: 'label' " + "} " + "layer { " + " bottom: 'data' " + " name: 'conv' " + " top: 'conv' " + " type: 'Convolution' " + "} " + "layer { " + " name: 'loss' " + " type: 'SoftmaxWithLoss' " + " bottom: 'conv' " + " bottom: 'label' " + "} "; + this->RunCompilerNetTest(input_proto, output_proto); +} + +TEST_F(CompileNetTest, TestRemoveBatchNorm2) { + const string& input_proto = + "name: 'TestNetwork' " + "layer { " + " name: 'data' " + " type: 'Data' " + " top: 'data' " + " top: 'label' " + "} " + "layer { " + " bottom: 'data' " + " name: 'fc1' " + " top: 'fc1' " + " type: 'InnerProduct' " + "} " + "layer { " + " bottom: 'fc1' " + " name: 'bn' " + " top: 'bn' " + " type: 'BatchNorm' " + "} " + "layer { " + " name: 'loss' " + " type: 'SoftmaxWithLoss' " + " bottom: 'bn' " + " bottom: 'label' " + "} "; + + const string& output_proto = + "name: 'TestNetwork' " + "layer { " + " name: 'data' " + " type: 'Data' " + " top: 'data' " + " top: 'label' " + "} " + "layer { " + " bottom: 'data' " + " name: 'fc1' " + " top: 'fc1' " + " type: 'InnerProduct' " + "} " + "layer { " + " bottom: 'fc1' " + " name: 'bn' " + " top: 'bn' " + " type: 'BatchNorm' " + "} " + "layer { " + " name: 'loss' " + " type: 'SoftmaxWithLoss' " + " bottom: 'bn' " + " bottom: 'label' " + "} "; + this->RunCompilerNetTest(input_proto, output_proto); +} + +TEST_F(CompileNetTest, TestRemoveBatchNorm3) { + const string& input_proto = + "name: 'TestNetwork' " + "layer { " + " name: 'data' " + " type: 'Data' " + " top: 'data' " + " top: 'label' " + "} " + "layer { " + " bottom: 'data' " + " name: 'conv' " + " top: 'conv' " + " type: 'Convolution' " + "} " + "layer { " + " bottom: 'conv' " + " name: 'bn' " + " top: 'conv' " + " type: 'BatchNorm' " + " batch_norm_param { " + " use_global_stats: false" + " }" + "} " + "layer { " + " name: 'loss' " + " type: 'SoftmaxWithLoss' " + " bottom: 'conv' " + " bottom: 'label' " + "} "; + + const string& output_proto = + "name: 'TestNetwork' " + "layer { " + " name: 'data' " + " type: 'Data' " + " top: 'data' " + " top: 'label' " + "} " + "layer { " + " bottom: 'data' " + " name: 'conv' " + " top: 'conv' " + " type: 'Convolution' " + "} " + "layer { " + " bottom: 'conv' " + " name: 'bn' " + " top: 'conv' " + " type: 'BatchNorm' " + " batch_norm_param { " + " use_global_stats: false" + " }" + "} " + "layer { " + " name: 'loss' " + " type: 'SoftmaxWithLoss' " + " bottom: 'conv' " + " bottom: 'label' " + "} "; + this->RunCompilerNetTest(input_proto, output_proto); +} + +TEST_F(CompileNetTest, TestRemoveBatchNorm4) { + const string& input_proto = + "name: 'TestNetwork' " + "layer { " + " name: 'data' " + " type: 'Data' " + " top: 'data' " + " top: 'label' " + "} " + "layer { " + " bottom: 'data' " + " name: 'conv' " + " top: 'conv' " + " type: 'Convolution' " + "} " + "layer { " + " bottom: 'conv' " + " name: 'bn' " + " top: 'conv' " + " type: 'BatchNorm' " + " batch_norm_param { " + " use_global_stats: true" + " }" + "} " + "layer { " + " name: 'loss' " + " type: 'SoftmaxWithLoss' " + " bottom: 'conv' " + " bottom: 'label' " + "} "; + + const string& output_proto = + "name: 'TestNetwork' " + "layer { " + " name: 'data' " + " type: 'Data' " + " top: 'data' " + " top: 'label' " + "} " + "layer { " + " bottom: 'data' " + " name: 'conv' " + " top: 'conv' " + " type: 'Convolution' " + "} " + "layer { " + " name: 'loss' " + " type: 'SoftmaxWithLoss' " + " bottom: 'conv' " + " bottom: 'label' " + "} "; + this->RunCompilerNetTest(input_proto, output_proto); +} #ifdef MKL2017_SUPPORTED // If BatchNorm of engine MKL2017 // produce blob consumed by diff --git a/src/caffe/util/remove_batch_norm.cpp b/src/caffe/util/remove_batch_norm.cpp index 63c9b3f81..8c56639fc 100644 --- a/src/caffe/util/remove_batch_norm.cpp +++ b/src/caffe/util/remove_batch_norm.cpp @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "caffe/blob.hpp" #include "caffe/util/remove_batch_norm.hpp" #include "caffe/util/math_functions.hpp" +#include "caffe/net.hpp" namespace caffe { template @@ -188,6 +189,106 @@ void RecoverBNScaleMergedNet(NetParameter * net_param, NetParameter* recovered_n } } +template +void RemoveBNScale(const NetParameter& param, NetParameter* param_compiled) { + + // - In TEST Phase, if we detect sequential layers conv->batch norm ->scale, + // We will merge batch norm and scale layer into conv layer. + if(param.state().phase() != TEST) { + param_compiled->CopyFrom(param); + param_compiled->mutable_compile_net_state()->set_bn_scale_remove(false); + return ; + } + + bool bn_scale_remove = false; + bool is_net_init = param.compile_net_state().is_init(); + std::set layers_to_drop; + for (int i = 0; i < param.layer_size(); ++i) { + LayerParameter *layer_param = (const_cast(param)).mutable_layer(i); + bool layer_included = true; + bool bn_use_global_stats_set = true; + if (layer_param->type().compare("Convolution") == 0) { + std::vector child_layers_params; + Net::GetBlobConsumers(child_layers_params, layer_param->top(0), param, i + 1 < param.layer_size() ? i + 1 : i); + const LayerParameter &child_layer_param = child_layers_params.size() > 0 ? *(child_layers_params[0]) : *layer_param; + // check whether child layer is BatchNorm + if (child_layer_param.type().compare("BatchNorm") == 0) { + BatchNormParameter bn_param = child_layer_param.batch_norm_param(); + if (is_net_init) { + //Testing Network init process + bool bn_use_global_stats = true; + if (bn_param.has_use_global_stats()) { + bn_use_global_stats = bn_param.use_global_stats(); + } + if (!bn_use_global_stats) { + //This bn layer's use_global_stats is set manually! Don't remove it. + //remained_bn_layer_names.push_back(child_layer_param.name()); + param_compiled->mutable_compile_net_state()->add_kept_bn_layers(child_layer_param.name()); + bn_use_global_stats_set = false; + } + } else { + int kept_bn_layers_num = param.compile_net_state().kept_bn_layers_size(); + bool in_kept_list = false; + for (int idx = 0; idx < kept_bn_layers_num; ++idx) { + if (child_layer_param.name().compare(param.compile_net_state().kept_bn_layers(idx)) == 0) { + in_kept_list = true; + break; + } + } + if (in_kept_list) { + bn_use_global_stats_set = false; + } + } + + if (!bn_use_global_stats_set) { + //Even in caffe TEST phase, current batch norm layer has set use_global_stats = false in protxt file, so we won't + //merge this layer into convolution layer. + param_compiled->add_layer()->CopyFrom(*layer_param); + continue; + } + std::vector grandchild_layers_params; + Net::GetBlobConsumers(grandchild_layers_params, child_layer_param.top(0), param, i + 2 < param.layer_size() ? i + 2 : i); + const LayerParameter &grandchild_layer_param = (grandchild_layers_params.size() > 0) ? *(grandchild_layers_params[0]) : child_layer_param; + if (grandchild_layer_param.type().compare("Scale") == 0) { + MergeLayer(*layer_param, grandchild_layer_param); + AdjustConvLayer(*layer_param, child_layer_param, grandchild_layer_param, is_net_init); + if (bn_scale_remove == false) bn_scale_remove = true; + layers_to_drop.insert(child_layer_param.name()); + layers_to_drop.insert(grandchild_layer_param.name()); + } else if (&child_layer_param != &grandchild_layer_param) { + //In fact, conv-->batchnorm can also be optimized. In such case, we check the blob size of batch norm layer + //if is 3, it means current net hasn't used scale layer, this is equivalent to scale layer with all 1 weights and 0 bias + //if is 4 or 5, it means intel caffe compilation rule 1 works here, we can recover the scale layer from batch norm layer + MergeLayer(*layer_param, child_layer_param); + if (!is_net_init) { + shared_ptr scale_layer_param(new LayerParameter()); + RecoverScaleFromBN(child_layer_param, *scale_layer_param, (Dtype)1, (Dtype)0); + AdjustConvLayer(*layer_param, child_layer_param, *scale_layer_param, is_net_init); + } else { + AdjustConvLayer(*layer_param, child_layer_param, grandchild_layer_param, true); + } + if (bn_scale_remove == false) bn_scale_remove = true; + layers_to_drop.insert(child_layer_param.name()); + } + } + } + if (layers_to_drop.find(layer_param->name()) != layers_to_drop.end()) { + LOG_IF(INFO, Caffe::root_solver()) << "Dropped Layer: "<< layer_param->name() << std::endl; + layer_included = false; + // Remove dropped layer from the list of layers to be dropped + layers_to_drop.erase(layers_to_drop.find(layer_param->name())); + } + if (layer_included) { + if (layer_param->type().compare("BatchNorm") == 0) { + param_compiled->mutable_compile_net_state()->add_kept_bn_layers(layer_param->name()); + } + param_compiled->add_layer()->CopyFrom(*layer_param); + } + } + + param_compiled->mutable_compile_net_state()->set_bn_scale_remove(bn_scale_remove); +} + template void RecoverScaleFromBN(const LayerParameter& bn_layer_param, LayerParameter& scale_layer_param, float default_scale_weights, float default_scale_bias); template void RecoverScaleFromBN(const LayerParameter& bn_layer_param, LayerParameter& scale_layer_param, double default_scale_weights, double default_scale_bias); template void AdjustConvLayer(LayerParameter &conv_layer, @@ -200,4 +301,6 @@ template void AdjustConvLayer(LayerParameter &conv_layer, template void RecoverBNScaleMergedNet(NetParameter * net_param, NetParameter* recovered_net_param); template void RecoverBNScaleMergedNet(NetParameter * net_param, NetParameter* recovered_net_param); +template void RemoveBNScale(const NetParameter& param, NetParameter* param_compiled); +template void RemoveBNScale(const NetParameter& param, NetParameter* param_compiled); } diff --git a/tools/caffe.cpp b/tools/caffe.cpp index 3cd2234f6..231209127 100644 --- a/tools/caffe.cpp +++ b/tools/caffe.cpp @@ -692,6 +692,9 @@ int main(int argc, char** argv) { " compare collects layer data using inputs from other device"); // Run tool or show usage. caffe::GlobalInit(&argc, &argv); +#ifdef USE_MLSL + caffe::mn::init(&argc, &argv); +#endif if (argc == 2) { #ifdef WITH_PYTHON_LAYER try { diff --git a/xbyak/COPYRIGHT b/xbyak/COPYRIGHT new file mode 100644 index 000000000..78d3140b8 --- /dev/null +++ b/xbyak/COPYRIGHT @@ -0,0 +1,47 @@ + +Copyright (c) 2007 MITSUNARI Shigeo +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. +Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. +Neither the name of the copyright owner nor the names of its contributors may +be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +THE POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +ソースコード形式かバイナリ形式か、変更するかしないかを問わず、以下の条件を満た +す場合に限り、再頒布および使用が許可されます。 + +ソースコードを再頒布する場合、上記の著作権表示、本条件一覧、および下記免責条項 +を含めること。 +バイナリ形式で再頒布する場合、頒布物に付属のドキュメント等の資料に、上記の著作 +権表示、本条件一覧、および下記免責条項を含めること。 +書面による特別の許可なしに、本ソフトウェアから派生した製品の宣伝または販売促進 +に、著作権者の名前またはコントリビューターの名前を使用してはならない。 +本ソフトウェアは、著作権者およびコントリビューターによって「現状のまま」提供さ +れており、明示黙示を問わず、商業的な使用可能性、および特定の目的に対する適合性 +に関する暗黙の保証も含め、またそれに限定されない、いかなる保証もありません。 +著作権者もコントリビューターも、事由のいかんを問わず、 損害発生の原因いかんを +問わず、かつ責任の根拠が契約であるか厳格責任であるか(過失その他の)不法行為で +あるかを問わず、仮にそのような損害が発生する可能性を知らされていたとしても、 +本ソフトウェアの使用によって発生した(代替品または代用サービスの調達、使用の +喪失、データの喪失、利益の喪失、業務の中断も含め、またそれに限定されない)直接 +損害、間接損害、偶発的な損害、特別損害、懲罰的損害、または結果損害について、 +一切責任を負わないものとします。 diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index d0cf6f9c3..31aa0a056 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -1,3 +1,48 @@ +/******************************************************************************* +* Copyright 2016-2017 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +/******************************************************************************* +* Copyright (c) 2007 MITSUNARI Shigeo +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* Redistributions of source code must retain the above copyright notice, this +* list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, +* this list of conditions and the following disclaimer in the documentation +* and/or other materials provided with the distribution. +* Neither the name of the copyright owner nor the names of its contributors may +* be used to endorse or promote products derived from this software without +* specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +* THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ + #pragma once #ifndef XBYAK_XBYAK_H_ #define XBYAK_XBYAK_H_ diff --git a/xbyak/xbyak_bin2hex.h b/xbyak/xbyak_bin2hex.h index 69ecdbfed..54e0d8ff1 100644 --- a/xbyak/xbyak_bin2hex.h +++ b/xbyak/xbyak_bin2hex.h @@ -1,3 +1,48 @@ +/******************************************************************************* +* Copyright 2016-2017 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +/******************************************************************************* +* Copyright (c) 2007 MITSUNARI Shigeo +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* Redistributions of source code must retain the above copyright notice, this +* list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, +* this list of conditions and the following disclaimer in the documentation +* and/or other materials provided with the distribution. +* Neither the name of the copyright owner nor the names of its contributors may +* be used to endorse or promote products derived from this software without +* specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +* THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ + enum { B00000000= 0, B00000001= 1, diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index ac5be9600..a781f0c30 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1,3 +1,48 @@ +/******************************************************************************* +* Copyright 2016-2017 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +/******************************************************************************* +* Copyright (c) 2007 MITSUNARI Shigeo +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* Redistributions of source code must retain the above copyright notice, this +* list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, +* this list of conditions and the following disclaimer in the documentation +* and/or other materials provided with the distribution. +* Neither the name of the copyright owner nor the names of its contributors may +* be used to endorse or promote products derived from this software without +* specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +* THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ + const char *getVersionString() const { return "4.87"; } void packssdw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6B); } void packsswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x63); } diff --git a/xbyak/xbyak_util.h b/xbyak/xbyak_util.h index 3a7c2c218..5854a1723 100644 --- a/xbyak/xbyak_util.h +++ b/xbyak/xbyak_util.h @@ -1,3 +1,48 @@ +/******************************************************************************* +* Copyright 2016-2017 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +/******************************************************************************* +* Copyright (c) 2007 MITSUNARI Shigeo +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* +* Redistributions of source code must retain the above copyright notice, this +* list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, +* this list of conditions and the following disclaimer in the documentation +* and/or other materials provided with the distribution. +* Neither the name of the copyright owner nor the names of its contributors may +* be used to endorse or promote products derived from this software without +* specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +* THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ + #ifndef XBYAK_XBYAK_UTIL_H_ #define XBYAK_XBYAK_UTIL_H_