diff --git a/include/tkDNN/Layer.h b/include/tkDNN/Layer.h index 9154e1b5..9bd84322 100644 --- a/include/tkDNN/Layer.h +++ b/include/tkDNN/Layer.h @@ -50,7 +50,7 @@ class Layer { } void setFinal() { this->final = true; } dataDim_t input_dim, output_dim; - dnnType *dstData; //where results will be putted + dnnType *dstData = nullptr; //where results will be putted int id = 0; bool final; //if the layer is the final one @@ -122,7 +122,7 @@ class LayerWgs : public Layer { __half *data16_d = nullptr, *bias16_d = nullptr; __half *bias216_h = nullptr, *bias216_d = nullptr; - __half *power16_h = nullptr; + __half *power16_h = nullptr, *power16_d = nullptr; __half *scales16_h = nullptr, *scales16_d = nullptr; __half *mean16_h = nullptr, *mean16_d = nullptr; __half *variance16_h = nullptr, *variance16_d = nullptr; @@ -164,6 +164,7 @@ class LayerWgs : public Layer { if( scales16_d != nullptr) { cudaFree( scales16_d); scales16_d = nullptr; } if( mean16_d != nullptr) { cudaFree( mean16_d); mean16_d = nullptr; } if(variance16_d != nullptr) { cudaFree(variance16_d); variance16_d = nullptr; } + if( power16_d != nullptr) { cudaFree( power16_d); power16_d = nullptr; } } } }; diff --git a/include/tkDNN/utils.h b/include/tkDNN/utils.h index f9f6ae77..aa73e9ed 100644 --- a/include/tkDNN/utils.h +++ b/include/tkDNN/utils.h @@ -116,5 +116,6 @@ void matrixMulAdd( cublasHandle_t handle, dnnType* srcData, dnnType* dstData, dnnType* add_vector, int dim, dnnType mul); void getMemUsage(double& vm_usage_kb, double& resident_set_kb); +void printCudaMemUsage(); void removePathAndExtension(const std::string &full_string, std::string &name); #endif //UTILS_H diff --git a/src/Layer.cpp b/src/Layer.cpp index 9f04ca54..a355b905 100644 --- a/src/Layer.cpp +++ b/src/Layer.cpp @@ -24,6 +24,11 @@ Layer::~Layer() { checkCUDNN( cudnnDestroyTensorDescriptor(srcTensorDesc) ); checkCUDNN( cudnnDestroyTensorDescriptor(dstTensorDesc) ); + + if(dstData != nullptr) { + cudaFree(dstData); + dstData = nullptr; + } } }} \ No newline at end of file diff --git a/src/LayerWgs.cpp b/src/LayerWgs.cpp index 21edf79b..4afb7cce 100644 --- a/src/LayerWgs.cpp +++ b/src/LayerWgs.cpp @@ -80,7 +80,7 @@ LayerWgs::LayerWgs(Network *net, int inputs, int outputs, variance16_h = new __half[b_size]; scales16_h = new __half[b_size]; - //cudaMalloc(&power16_d, b_size*sizeof(__half)); + cudaMalloc(&power16_d, b_size*sizeof(__half)); cudaMalloc(&mean16_d, b_size*sizeof(__half)); cudaMalloc(&variance16_d, b_size*sizeof(__half)); cudaMalloc(&scales16_d, b_size*sizeof(__half)); @@ -91,8 +91,8 @@ LayerWgs::LayerWgs(Network *net, int inputs, int outputs, //init power array of ones cudaMemcpy(tmp_d, power_h, b_size*sizeof(float), cudaMemcpyHostToDevice); - //float2half(tmp_d, power16_d, b_size); - //cudaMemcpy(power16_h, power16_d, b_size*sizeof(__half), cudaMemcpyDeviceToHost); + float2half(tmp_d, power16_d, b_size); + cudaMemcpy(power16_h, power16_d, b_size*sizeof(__half), cudaMemcpyDeviceToHost); //mean array cudaMemcpy(tmp_d, mean_h, b_size*sizeof(float), cudaMemcpyHostToDevice); diff --git a/src/Network.cpp b/src/Network.cpp index 9adcc48c..7fa291fd 100644 --- a/src/Network.cpp +++ b/src/Network.cpp @@ -128,6 +128,7 @@ void Network::print() { } printCenteredTitle("", '=', 60); std::cout<<"\n"; + printCudaMemUsage(); } const char *Network::getNetworkRTName(const char *network_name){ networkName = network_name; diff --git a/src/NetworkRT.cpp b/src/NetworkRT.cpp index 0824eba0..9f53b06c 100644 --- a/src/NetworkRT.cpp +++ b/src/NetworkRT.cpp @@ -134,6 +134,7 @@ NetworkRT::NetworkRT(Network *net, const char *name) { networkRT->markOutput(*input); std::cout<<"Selected maxBatchSize: "<getMaxBatchSize()<<"\n"; + printCudaMemUsage(); std::cout<<"Building tensorRT cuda engine...\n"; #if NV_TENSORRT_MAJOR >= 6 engineRT = builderRT->buildEngineWithConfig(*networkRT, *configRT); diff --git a/src/utils.cpp b/src/utils.cpp index 3bd91194..65030f06 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -197,6 +197,12 @@ void getMemUsage(double& vm_usage_kb, double& resident_set_kb){ resident_set_kb = rss * page_size_kb; } +void printCudaMemUsage() { + size_t free, total; + checkCuda( cudaMemGetInfo(&free, &total) ); + std::cout<<"GPU free memory: "<