Skip to content

Commit cf42e7c

Browse files
committed
deconvolutiondepthwise pack4 arm neon
1 parent b4c388a commit cf42e7c

File tree

6 files changed

+814
-97
lines changed

6 files changed

+814
-97
lines changed

src/layer/arm/convolutiondepthwise_arm.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -580,7 +580,7 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con
580580
Mat top_blob_unpacked = top_blob;
581581
if (num_output_g % 4 != 0 && out_elempack == 4)
582582
{
583-
top_blob_unpacked.create(outw, outh, num_output, elemsize / elempack, 1, opt.workspace_allocator);
583+
top_blob_unpacked.create(outw, outh, num_output, out_elemsize / out_elempack, 1, opt.workspace_allocator);
584584
if (top_blob_unpacked.empty())
585585
return -100;
586586
}

src/layer/arm/deconvolution_arm.cpp

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -308,25 +308,6 @@ int Deconvolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opti
308308

309309
const int maxk = kernel_w * kernel_h;
310310

311-
// kernel offsets
312-
std::vector<int> _space_ofs(maxk);
313-
int* space_ofs = &_space_ofs[0];
314-
{
315-
int p1 = 0;
316-
int p2 = 0;
317-
int gap = outw * dilation_h - kernel_w * dilation_w;
318-
for (int i = 0; i < kernel_h; i++)
319-
{
320-
for (int j = 0; j < kernel_w; j++)
321-
{
322-
space_ofs[p1] = p2;
323-
p1++;
324-
p2 += dilation_w;
325-
}
326-
p2 += gap;
327-
}
328-
}
329-
330311
if (elempack == 4 && out_elempack == 4)
331312
{
332313
// num_output

0 commit comments

Comments
 (0)