Skip to content

Commit 0458f36

Browse files
committed
release layer wgs and version update
1 parent 2f243f2 commit 0458f36

File tree

3 files changed

+61
-32
lines changed

3 files changed

+61
-32
lines changed

include/tkDNN/Layer.h

+53-13
Original file line numberDiff line numberDiff line change
@@ -108,24 +108,64 @@ class LayerWgs : public Layer {
108108

109109
// additional bias for DCN
110110
bool additional_bias;
111-
dnnType *bias2_h, *bias2_d;
111+
dnnType *bias2_h = nullptr, *bias2_d = nullptr;
112112

113113
//batchnorm
114114
bool batchnorm;
115-
dnnType *power_h;
116-
dnnType *scales_h, *scales_d;
117-
dnnType *mean_h, *mean_d;
118-
dnnType *variance_h, *variance_d;
115+
dnnType *power_h = nullptr;
116+
dnnType *scales_h = nullptr, *scales_d = nullptr;
117+
dnnType *mean_h = nullptr, *mean_d = nullptr;
118+
dnnType *variance_h = nullptr, *variance_d = nullptr;
119119

120120
//fp16
121-
__half *data16_h, *bias16_h;
122-
__half *data16_d, *bias16_d;
123-
__half *bias216_h, *bias216_d;
124-
125-
__half *power16_h, *power16_d;
126-
__half *scales16_h, *scales16_d;
127-
__half *mean16_h, *mean16_d;
128-
__half *variance16_h, *variance16_d;
121+
__half *data16_h = nullptr, *bias16_h = nullptr;
122+
__half *data16_d = nullptr, *bias16_d = nullptr;
123+
__half *bias216_h = nullptr, *bias216_d = nullptr;
124+
125+
__half *power16_h = nullptr;
126+
__half *scales16_h = nullptr, *scales16_d = nullptr;
127+
__half *mean16_h = nullptr, *mean16_d = nullptr;
128+
__half *variance16_h = nullptr, *variance16_d = nullptr;
129+
130+
void releaseHost(bool release32 = true, bool release16 = true) {
131+
if(release32) {
132+
if( data_h != nullptr) { delete [] data_h; data_h = nullptr; }
133+
if( bias_h != nullptr) { delete [] bias_h; bias_h = nullptr; }
134+
if( bias2_h != nullptr) { delete [] bias2_h; bias2_h = nullptr; }
135+
if( scales_h != nullptr) { delete [] scales_h; scales_h = nullptr; }
136+
if( mean_h != nullptr) { delete [] mean_h; mean_h = nullptr; }
137+
if(variance_h != nullptr) { delete [] variance_h; variance_h = nullptr; }
138+
if( power_h != nullptr) { delete [] power_h; power_h = nullptr; }
139+
}
140+
if(net->fp16 && release16) {
141+
if( data16_h != nullptr) { delete [] data16_h; data16_h = nullptr; }
142+
if( bias16_h != nullptr) { delete [] bias16_h; bias16_h = nullptr; }
143+
if( bias216_h != nullptr) { delete [] bias216_h; bias216_h = nullptr; }
144+
if( scales16_h != nullptr) { delete [] scales16_h; scales16_h = nullptr; }
145+
if( mean16_h != nullptr) { delete [] mean16_h; mean16_h = nullptr; }
146+
if(variance16_h != nullptr) { delete [] variance16_h; variance16_h = nullptr; }
147+
if( power16_h != nullptr) { delete [] power16_h; power16_h = nullptr; }
148+
149+
}
150+
}
151+
void releaseDevice(bool release32 = true, bool release16 = true) {
152+
if(release32) {
153+
if( data_d != nullptr) { cudaFree( data_d); data_d = nullptr; }
154+
if( bias_d != nullptr) { cudaFree( bias_d); bias_d = nullptr; }
155+
if( bias2_d != nullptr) { cudaFree( bias2_d); bias2_d = nullptr; }
156+
if( scales_d != nullptr) { cudaFree( scales_d); scales_d = nullptr; }
157+
if( mean_d != nullptr) { cudaFree( mean_d); mean_d = nullptr; }
158+
if(variance_d != nullptr) { cudaFree(variance_d); variance_d = nullptr; }
159+
}
160+
if(net->fp16 && release16) {
161+
if( data16_d != nullptr) { cudaFree( data16_d); data16_d = nullptr; }
162+
if( bias16_d != nullptr) { cudaFree( bias16_d); bias16_d = nullptr; }
163+
if( bias216_d != nullptr) { cudaFree( bias216_d); bias216_d = nullptr; }
164+
if( scales16_d != nullptr) { cudaFree( scales16_d); scales16_d = nullptr; }
165+
if( mean16_d != nullptr) { cudaFree( mean16_d); mean16_d = nullptr; }
166+
if(variance16_d != nullptr) { cudaFree(variance16_d); variance16_d = nullptr; }
167+
}
168+
}
129169
};
130170

131171

include/tkDNN/tkdnn.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@
55
#include "Layer.h"
66
#include "NetworkRT.h"
77

8-
#define TKDNN_VERSION 400
8+
#define TKDNN_VERSION 500

src/LayerWgs.cpp

+7-18
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ LayerWgs::LayerWgs(Network *net, int inputs, int outputs,
8080
variance16_h = new __half[b_size];
8181
scales16_h = new __half[b_size];
8282

83-
cudaMalloc(&power16_d, b_size*sizeof(__half));
83+
//cudaMalloc(&power16_d, b_size*sizeof(__half));
8484
cudaMalloc(&mean16_d, b_size*sizeof(__half));
8585
cudaMalloc(&variance16_d, b_size*sizeof(__half));
8686
cudaMalloc(&scales16_d, b_size*sizeof(__half));
@@ -91,11 +91,10 @@ LayerWgs::LayerWgs(Network *net, int inputs, int outputs,
9191

9292
//init power array of ones
9393
cudaMemcpy(tmp_d, power_h, b_size*sizeof(float), cudaMemcpyHostToDevice);
94-
float2half(tmp_d, power16_d, b_size);
95-
cudaMemcpy(power16_h, power16_d, b_size*sizeof(__half), cudaMemcpyDeviceToHost);
94+
//float2half(tmp_d, power16_d, b_size);
95+
//cudaMemcpy(power16_h, power16_d, b_size*sizeof(__half), cudaMemcpyDeviceToHost);
9696

9797
//mean array
98-
9998
cudaMemcpy(tmp_d, mean_h, b_size*sizeof(float), cudaMemcpyHostToDevice);
10099
float2half(tmp_d, mean16_d, b_size);
101100
cudaMemcpy(mean16_h, mean16_d, b_size*sizeof(__half), cudaMemcpyDeviceToHost);
@@ -109,24 +108,14 @@ LayerWgs::LayerWgs(Network *net, int inputs, int outputs,
109108
//conver scales
110109
float2half(scales_d, scales16_d, b_size);
111110
cudaMemcpy(scales16_h, scales16_d, b_size*sizeof(__half), cudaMemcpyDeviceToHost);
111+
112+
cudaFree(tmp_d);
112113
}
113114
}
114115

115116
LayerWgs::~LayerWgs() {
116-
117-
delete [] data_h;
118-
delete [] bias_h;
119-
checkCuda( cudaFree(data_d) );
120-
checkCuda( cudaFree(bias_d) );
121-
122-
if(batchnorm) {
123-
delete [] scales_h;
124-
delete [] mean_h;
125-
delete [] variance_h;
126-
checkCuda( cudaFree(scales_d) );
127-
checkCuda( cudaFree(mean_d) );
128-
checkCuda( cudaFree(variance_d) );
129-
}
117+
releaseHost();
118+
releaseDevice();
130119
}
131120

132121
}}

0 commit comments

Comments
 (0)