jalola
diff --git a/‎src/command.cpp‎
Lines changed: 2 additions & 2 deletions b/‎src/command.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/layer/arm/absval_arm.cpp‎
Lines changed: 2 additions & 2 deletions b/‎src/layer/arm/absval_arm.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/layer/arm/batchnorm_arm.cpp‎
Lines changed: 2 additions & 2 deletions b/‎src/layer/arm/batchnorm_arm.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/layer/arm/clip_arm.cpp‎
Lines changed: 2 additions & 2 deletions b/‎src/layer/arm/clip_arm.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/layer/arm/convolution_arm.cpp‎
Lines changed: 9 additions & 9 deletions b/‎src/layer/arm/convolution_arm.cpp‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎src/layer/arm/convolutiondepthwise_arm.cpp‎
Lines changed: 12 additions & 12 deletions b/‎src/layer/arm/convolutiondepthwise_arm.cpp‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎src/layer/arm/innerproduct_arm.cpp‎
Lines changed: 9 additions & 9 deletions b/‎src/layer/arm/innerproduct_arm.cpp‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎src/layer/arm/packing_arm.cpp‎
Lines changed: 17 additions & 17 deletions b/‎src/layer/arm/packing_arm.cpp‎
Lines changed: 17 additions & 17 deletions
diff --git a/‎src/layer/arm/padding_arm.cpp‎
Lines changed: 2 additions & 2 deletions b/‎src/layer/arm/padding_arm.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/layer/arm/relu_arm.cpp‎
Lines changed: 2 additions & 2 deletions b/‎src/layer/arm/relu_arm.cpp‎
Lines changed: 2 additions & 2 deletions
@@ -847,9 +847,9 @@ VkTransfer::~VkTransfer()
 
 void VkTransfer::record_upload(const Mat& src, VkMat& dst, const Option& opt)
 {
-    if (src.elemsize / src.packing == 4)
+    if (src.elemsize / src.elempack == 4)
     {
-        if (opt.use_fp16_storage || (opt.use_fp16_packed && src.packing % 4 == 0))
+        if (opt.use_fp16_storage || (opt.use_fp16_packed && src.elempack % 4 == 0))
         {
             Mat src_fp16;
             cast_float32_to_float16(src, src_fp16);
 
@@ -35,10 +35,10 @@ int AbsVal_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
     int h = bottom_top_blob.h;
     int channels = bottom_top_blob.c;
     int size = w * h;
-    int packing = bottom_top_blob.packing;
+    int elempack = bottom_top_blob.elempack;
 
 #if __ARM_NEON
-    if (packing == 4)
+    if (elempack == 4)
     {
         #pragma omp parallel for num_threads(opt.num_threads)
         for (int q=0; q<channels; q++)
 
@@ -42,10 +42,10 @@ int BatchNorm_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) cons
     int w = bottom_top_blob.w;
     int h = bottom_top_blob.h;
     int size = w * h;
-    int packing = bottom_top_blob.packing;
+    int elempack = bottom_top_blob.elempack;
 
 #if __ARM_NEON
-    if (packing == 4)
+    if (elempack == 4)
     {
         const float* a_data_ptr = a_data;
         const float* b_data_ptr = b_data;
 
@@ -35,10 +35,10 @@ int Clip_arm::forward_inplace(Mat &bottom_top_blob, const Option &opt) const
     int h = bottom_top_blob.h;
     int channels = bottom_top_blob.c;
     int size = w * h;
-    int packing = bottom_top_blob.packing;
+    int elempack = bottom_top_blob.elempack;
 
 #if __ARM_NEON
-    if (packing == 4)
+    if (elempack == 4)
     {
         #pragma omp parallel for num_threads(opt.num_threads)
         for (int q=0; q<channels; q++)
 
@@ -471,7 +471,7 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option
     int h = bottom_blob.h;
     int channels = bottom_blob.c;
     size_t elemsize = bottom_blob.elemsize;
-    int packing = bottom_blob.packing;
+    int elempack = bottom_blob.elempack;
 
 //     fprintf(stderr, "Convolution input %d x %d  pad = %d %d  ksize=%d %d  stride=%d %d\n", w, h, pad_w, pad_h, kernel_w, kernel_h, stride_w, stride_h);
 
@@ -505,8 +505,8 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option
 
     int outw = (w - kernel_extent_w) / stride_w + 1;
     int outh = (h - kernel_extent_h) / stride_h + 1;
-    int out_packing = num_output % 4 == 0 ? 4 : 1;
-    size_t out_elemsize = elemsize / packing * out_packing;
+    int out_elempack = num_output % 4 == 0 ? 4 : 1;
+    size_t out_elemsize = elemsize / elempack * out_elempack;
 
     const int maxk = kernel_w * kernel_h;
 
@@ -530,15 +530,15 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option
     }
 
     // float32
-    top_blob.create(outw, outh, num_output / out_packing, out_elemsize, out_packing, opt.blob_allocator);
+    top_blob.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);
     if (top_blob.empty())
         return -100;
 
-    if (packing == 4 && out_packing == 4)
+    if (elempack == 4 && out_elempack == 4)
     {
         // num_output
         #pragma omp parallel for num_threads(opt.num_threads)
-        for (int p=0; p<num_output / out_packing; p++)
+        for (int p=0; p<num_output / out_elempack; p++)
         {
             float* outptr = top_blob.channel(p);
 
@@ -628,11 +628,11 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option
         return 0;
     }
 
-    if (packing == 1 && out_packing == 4)
+    if (elempack == 1 && out_elempack == 4)
     {
         // num_output
         #pragma omp parallel for num_threads(opt.num_threads)
-        for (int p=0; p<num_output / out_packing; p++)
+        for (int p=0; p<num_output / out_elempack; p++)
         {
             float* outptr = top_blob.channel(p);
 
@@ -707,7 +707,7 @@ int Convolution_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option
         return 0;
     }
 
-    if (packing == 4 && out_packing == 1)
+    if (elempack == 4 && out_elempack == 1)
     {
         // num_output
         #pragma omp parallel for num_threads(opt.num_threads)
 
@@ -397,7 +397,7 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con
     int h = bottom_blob.h;
     int channels = bottom_blob.c;
     size_t elemsize = bottom_blob.elemsize;
-    int packing = bottom_blob.packing;
+    int elempack = bottom_blob.elempack;
 
     const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1;
     const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1;
@@ -455,8 +455,8 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con
 
     int outw = (w - kernel_extent_w) / stride_w + 1;
     int outh = (h - kernel_extent_h) / stride_h + 1;
-    int out_packing = num_output % 4 == 0 ? 4 : 1;
-    size_t out_elemsize = elemsize / packing * out_packing;
+    int out_elempack = num_output % 4 == 0 ? 4 : 1;
+    size_t out_elemsize = elemsize / elempack * out_elempack;
 
     if (opt.use_packing_layout)
     {
@@ -482,17 +482,17 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con
         }
     }
 
-    top_blob.create(outw, outh, num_output / out_packing, out_elemsize, out_packing, opt.blob_allocator);
+    top_blob.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);
     if (top_blob.empty())
         return -100;
 
     // depth-wise
-    if (channels == group / packing && group / packing == num_output / packing)
+    if (channels == group / elempack && group / elempack == num_output / elempack)
     {
-    if (packing == 4)
+    if (elempack == 4)
     {
         #pragma omp parallel for num_threads(opt.num_threads)
-        for (int g=0; g<group / packing; g++)
+        for (int g=0; g<group / elempack; g++)
         {
             float* outptr = top_blob.channel(g);
             const float* kptr = (const float*)weight_data_pack4 + maxk * g * 4;
@@ -561,20 +561,20 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con
     }
     }
 
-    const int channels_g = channels * packing / group;
+    const int channels_g = channels * elempack / group;
     const int num_output_g = num_output / group;
 
     // unpacking
     Mat bottom_blob_bordered_unpacked = bottom_blob_bordered;
-    if (packing == 4 && channels_g % 4 != 0)
+    if (elempack == 4 && channels_g % 4 != 0)
     {
         convert_packing(bottom_blob_bordered, bottom_blob_bordered_unpacked, 1, opt.workspace_allocator, opt.num_threads);
     }
 
     Mat top_blob_unpacked = top_blob;
-    if (num_output_g % 4 != 0 && out_packing == 4)
+    if (num_output_g % 4 != 0 && out_elempack == 4)
     {
-        top_blob_unpacked.create(outw, outh, num_output, elemsize / packing, 1, opt.workspace_allocator);
+        top_blob_unpacked.create(outw, outh, num_output, elemsize / elempack, 1, opt.workspace_allocator);
         if (top_blob_unpacked.empty())
             return -100;
     }
@@ -843,7 +843,7 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con
     }
 
     // packing
-    if (num_output_g % 4 != 0 && out_packing == 4)
+    if (num_output_g % 4 != 0 && out_elempack == 4)
     {
         convert_packing(top_blob_unpacked, top_blob, 4, opt.blob_allocator, opt.num_threads);
     }
 
@@ -160,26 +160,26 @@ int InnerProduct_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Optio
     int h = bottom_blob.h;
     int channels = bottom_blob.c;
     size_t elemsize = bottom_blob.elemsize;
-    int packing = bottom_blob.packing;
+    int elempack = bottom_blob.elempack;
     int size = w * h;
 
     if (opt.use_packing_layout)
     {
 
     int num_input = bottom_blob.w;
 
-    int out_packing = num_output % 4 == 0 ? 4 : 1;
-    size_t out_elemsize = elemsize / packing * out_packing;
+    int out_elempack = num_output % 4 == 0 ? 4 : 1;
+    size_t out_elemsize = elemsize / elempack * out_elempack;
 
-    top_blob.create(num_output / out_packing, out_elemsize, out_packing, opt.blob_allocator);
+    top_blob.create(num_output / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);
     if (top_blob.empty())
         return -100;
 
-    if (packing == 4 && out_packing == 4)
+    if (elempack == 4 && out_elempack == 4)
     {
         // num_output
         #pragma omp parallel for num_threads(opt.num_threads)
-        for (int p=0; p<num_output / out_packing; p++)
+        for (int p=0; p<num_output / out_elempack; p++)
         {
             const float* w = (const float*)weight_data_pack4 + num_input * p * 16;
             const float* m = bottom_blob;
@@ -256,11 +256,11 @@ int InnerProduct_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Optio
         return 0;
     }
 
-    if (packing == 1 && out_packing == 4)
+    if (elempack == 1 && out_elempack == 4)
     {
         // num_output
         #pragma omp parallel for num_threads(opt.num_threads)
-        for (int p=0; p<num_output / out_packing; p++)
+        for (int p=0; p<num_output / out_elempack; p++)
         {
             const float* w = (const float*)weight_data_pack1to4 + num_input * p * 4;
             const float* m = bottom_blob;
@@ -321,7 +321,7 @@ int InnerProduct_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Optio
         return 0;
     }
 
-    if (packing == 4 && out_packing == 1)
+    if (elempack == 4 && out_elempack == 1)
     {
         // num_output
         #pragma omp parallel for num_threads(opt.num_threads)
 
@@ -34,16 +34,16 @@ int Packing_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op
         return Packing::forward(bottom_blob, top_blob, opt);
     }
 
-    int packing = bottom_blob.packing;
+    int elempack = bottom_blob.elempack;
 
-    if (packing == out_packing)
+    if (elempack == out_elempack)
     {
         top_blob = bottom_blob;
         return 0;
     }
 
-    bool pack1to4 = packing == 1 && out_packing == 4;
-    bool pack4to1 = packing == 4 && out_packing == 1;
+    bool pack1to4 = elempack == 1 && out_elempack == 4;
+    bool pack4to1 = elempack == 4 && out_elempack == 1;
 
     if (!pack1to4 && !pack4to1)
     {
@@ -59,17 +59,17 @@ int Packing_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op
     if (!use_padding)
     {
         // identity if use_padding not allowed
-        if (dims == 1 && w * packing % out_packing != 0)
+        if (dims == 1 && w * elempack % out_elempack != 0)
         {
             top_blob = bottom_blob;
             return 0;
         }
-        if (dims == 2 && h * packing % out_packing != 0)
+        if (dims == 2 && h * elempack % out_elempack != 0)
         {
             top_blob = bottom_blob;
             return 0;
         }
-        if (dims == 3 && channels * packing % out_packing != 0)
+        if (dims == 3 && channels * elempack % out_elempack != 0)
         {
             top_blob = bottom_blob;
             return 0;
@@ -79,19 +79,19 @@ int Packing_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op
     if (dims == 1)
     {
         top_blob = bottom_blob;
-        top_blob.w = w * packing / out_packing;
-        top_blob.cstep = w * packing / out_packing;
-        top_blob.elemsize = elemsize / packing * out_packing;
-        top_blob.packing = out_packing;
+        top_blob.w = w * elempack / out_elempack;
+        top_blob.cstep = w * elempack / out_elempack;
+        top_blob.elemsize = elemsize / elempack * out_elempack;
+        top_blob.elempack = out_elempack;
         return 0;
     }
 
     if (dims == 2)
     {
-        int outh = h * packing / out_packing;
-        size_t out_elemsize = elemsize / packing * out_packing;
+        int outh = h * elempack / out_elempack;
+        size_t out_elemsize = elemsize / elempack * out_elempack;
 
-        top_blob.create(w, outh, out_elemsize, out_packing, opt.blob_allocator);
+        top_blob.create(w, outh, out_elemsize, out_elempack, opt.blob_allocator);
         if (top_blob.empty())
             return -100;
 
@@ -195,10 +195,10 @@ int Packing_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op
     if (dims == 3)
     {
         int size = w * h;
-        int outc = channels * packing / out_packing;
-        size_t out_elemsize = elemsize / packing * out_packing;
+        int outc = channels * elempack / out_elempack;
+        size_t out_elemsize = elemsize / elempack * out_elempack;
 
-        top_blob.create(w, h, outc, out_elemsize, out_packing, opt.blob_allocator);
+        top_blob.create(w, h, outc, out_elemsize, out_elempack, opt.blob_allocator);
         if (top_blob.empty())
             return -100;
 
 
@@ -162,10 +162,10 @@ int Padding_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op
     int channels = bottom_blob.c;
     int dims = bottom_blob.dims;
     size_t elemsize = bottom_blob.elemsize;
-    int packing = bottom_blob.packing;
+    int elempack = bottom_blob.elempack;
 
 #if __ARM_NEON
-    if (packing == 4)
+    if (elempack == 4)
     {
         int outw = w + left + right;
 
 
@@ -119,10 +119,10 @@ int ReLU_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
     int h = bottom_top_blob.h;
     int channels = bottom_top_blob.c;
     int size = w * h;
-    int packing = bottom_top_blob.packing;
+    int elempack = bottom_top_blob.elempack;
 
 #if __ARM_NEON
-    if (packing == 4)
+    if (elempack == 4)
     {
         if (slope == 0.f)
         {
Original file line number	Diff line number	Diff line change
`@@ -847,9 +847,9 @@ VkTransfer::~VkTransfer()`
`847`	`847`
`848`	`848`	`void VkTransfer::record_upload(const Mat& src, VkMat& dst, const Option& opt)`
`849`	`849`	`{`
`850`		`- if (src.elemsize / src.packing == 4)`
	`850`	`+ if (src.elemsize / src.elempack == 4)`
`851`	`851`	`{`
`852`		`- if (opt.use_fp16_storage \|\| (opt.use_fp16_packed && src.packing % 4 == 0))`
	`852`	`+ if (opt.use_fp16_storage \|\| (opt.use_fp16_packed && src.elempack % 4 == 0))`
`853`	`853`	`{`
`854`	`854`	`Mat src_fp16;`
`855`	`855`	`cast_float32_to_float16(src, src_fp16);`
Original file line number	Diff line number	Diff line change
`@@ -397,7 +397,7 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con`
`397`	`397`	`int h = bottom_blob.h;`
`398`	`398`	`int channels = bottom_blob.c;`
`399`	`399`	`size_t elemsize = bottom_blob.elemsize;`
`400`		`- int packing = bottom_blob.packing;`
	`400`	`+ int elempack = bottom_blob.elempack;`
`401`	`401`
`402`	`402`	`const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1;`
`403`	`403`	`const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1;`
`@@ -455,8 +455,8 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con`
`455`	`455`
`456`	`456`	`int outw = (w - kernel_extent_w) / stride_w + 1;`
`457`	`457`	`int outh = (h - kernel_extent_h) / stride_h + 1;`
`458`		`- int out_packing = num_output % 4 == 0 ? 4 : 1;`
`459`		`- size_t out_elemsize = elemsize / packing * out_packing;`
	`458`	`+ int out_elempack = num_output % 4 == 0 ? 4 : 1;`
	`459`	`+ size_t out_elemsize = elemsize / elempack * out_elempack;`
`460`	`460`
`461`	`461`	`if (opt.use_packing_layout)`
`462`	`462`	`{`
`@@ -482,17 +482,17 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con`
`482`	`482`	`}`
`483`	`483`	`}`
`484`	`484`
`485`		`- top_blob.create(outw, outh, num_output / out_packing, out_elemsize, out_packing, opt.blob_allocator);`
	`485`	`+ top_blob.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);`
`486`	`486`	`if (top_blob.empty())`
`487`	`487`	`return -100;`
`488`	`488`
`489`	`489`	`// depth-wise`
`490`		`- if (channels == group / packing && group / packing == num_output / packing)`
	`490`	`+ if (channels == group / elempack && group / elempack == num_output / elempack)`
`491`	`491`	`{`
`492`		`- if (packing == 4)`
	`492`	`+ if (elempack == 4)`
`493`	`493`	`{`
`494`	`494`	`#pragma omp parallel for num_threads(opt.num_threads)`
`495`		`- for (int g=0; g<group / packing; g++)`
	`495`	`+ for (int g=0; g<group / elempack; g++)`
`496`	`496`	`{`
`497`	`497`	`float* outptr = top_blob.channel(g);`
`498`	`498`	`const float* kptr = (const float)weight_data_pack4 + maxk g * 4;`
`@@ -561,20 +561,20 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con`
`561`	`561`	`}`
`562`	`562`	`}`
`563`	`563`
`564`		`- const int channels_g = channels * packing / group;`
	`564`	`+ const int channels_g = channels * elempack / group;`
`565`	`565`	`const int num_output_g = num_output / group;`
`566`	`566`
`567`	`567`	`// unpacking`
`568`	`568`	`Mat bottom_blob_bordered_unpacked = bottom_blob_bordered;`
`569`		`- if (packing == 4 && channels_g % 4 != 0)`
	`569`	`+ if (elempack == 4 && channels_g % 4 != 0)`
`570`	`570`	`{`
`571`	`571`	`convert_packing(bottom_blob_bordered, bottom_blob_bordered_unpacked, 1, opt.workspace_allocator, opt.num_threads);`
`572`	`572`	`}`
`573`	`573`
`574`	`574`	`Mat top_blob_unpacked = top_blob;`
`575`		`- if (num_output_g % 4 != 0 && out_packing == 4)`
	`575`	`+ if (num_output_g % 4 != 0 && out_elempack == 4)`
`576`	`576`	`{`
`577`		`- top_blob_unpacked.create(outw, outh, num_output, elemsize / packing, 1, opt.workspace_allocator);`
	`577`	`+ top_blob_unpacked.create(outw, outh, num_output, elemsize / elempack, 1, opt.workspace_allocator);`
`578`	`578`	`if (top_blob_unpacked.empty())`
`579`	`579`	`return -100;`
`580`	`580`	`}`
`@@ -843,7 +843,7 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con`
`843`	`843`	`}`
`844`	`844`
`845`	`845`	`// packing`
`846`		`- if (num_output_g % 4 != 0 && out_packing == 4)`
	`846`	`+ if (num_output_g % 4 != 0 && out_elempack == 4)`
`847`	`847`	`{`
`848`	`848`	`convert_packing(top_blob_unpacked, top_blob, 4, opt.blob_allocator, opt.num_threads);`
`849`	`849`	`}`
Original file line number	Diff line number	Diff line change
`@@ -34,16 +34,16 @@ int Packing_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op`
`34`	`34`	`return Packing::forward(bottom_blob, top_blob, opt);`
`35`	`35`	`}`
`36`	`36`
`37`		`- int packing = bottom_blob.packing;`
	`37`	`+ int elempack = bottom_blob.elempack;`
`38`	`38`
`39`		`- if (packing == out_packing)`
	`39`	`+ if (elempack == out_elempack)`
`40`	`40`	`{`
`41`	`41`	`top_blob = bottom_blob;`
`42`	`42`	`return 0;`
`43`	`43`	`}`
`44`	`44`
`45`		`- bool pack1to4 = packing == 1 && out_packing == 4;`
`46`		`- bool pack4to1 = packing == 4 && out_packing == 1;`
	`45`	`+ bool pack1to4 = elempack == 1 && out_elempack == 4;`
	`46`	`+ bool pack4to1 = elempack == 4 && out_elempack == 1;`
`47`	`47`
`48`	`48`	`if (!pack1to4 && !pack4to1)`
`49`	`49`	`{`
`@@ -59,17 +59,17 @@ int Packing_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op`
`59`	`59`	`if (!use_padding)`
`60`	`60`	`{`
`61`	`61`	`// identity if use_padding not allowed`
`62`		`- if (dims == 1 && w * packing % out_packing != 0)`
	`62`	`+ if (dims == 1 && w * elempack % out_elempack != 0)`
`63`	`63`	`{`
`64`	`64`	`top_blob = bottom_blob;`
`65`	`65`	`return 0;`
`66`	`66`	`}`
`67`		`- if (dims == 2 && h * packing % out_packing != 0)`
	`67`	`+ if (dims == 2 && h * elempack % out_elempack != 0)`
`68`	`68`	`{`
`69`	`69`	`top_blob = bottom_blob;`
`70`	`70`	`return 0;`
`71`	`71`	`}`
`72`		`- if (dims == 3 && channels * packing % out_packing != 0)`
	`72`	`+ if (dims == 3 && channels * elempack % out_elempack != 0)`
`73`	`73`	`{`
`74`	`74`	`top_blob = bottom_blob;`
`75`	`75`	`return 0;`
`@@ -79,19 +79,19 @@ int Packing_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op`
`79`	`79`	`if (dims == 1)`
`80`	`80`	`{`
`81`	`81`	`top_blob = bottom_blob;`
`82`		`- top_blob.w = w * packing / out_packing;`
`83`		`- top_blob.cstep = w * packing / out_packing;`
`84`		`- top_blob.elemsize = elemsize / packing * out_packing;`
`85`		`- top_blob.packing = out_packing;`
	`82`	`+ top_blob.w = w * elempack / out_elempack;`
	`83`	`+ top_blob.cstep = w * elempack / out_elempack;`
	`84`	`+ top_blob.elemsize = elemsize / elempack * out_elempack;`
	`85`	`+ top_blob.elempack = out_elempack;`
`86`	`86`	`return 0;`
`87`	`87`	`}`
`88`	`88`
`89`	`89`	`if (dims == 2)`
`90`	`90`	`{`
`91`		`- int outh = h * packing / out_packing;`
`92`		`- size_t out_elemsize = elemsize / packing * out_packing;`
	`91`	`+ int outh = h * elempack / out_elempack;`
	`92`	`+ size_t out_elemsize = elemsize / elempack * out_elempack;`
`93`	`93`
`94`		`- top_blob.create(w, outh, out_elemsize, out_packing, opt.blob_allocator);`
	`94`	`+ top_blob.create(w, outh, out_elemsize, out_elempack, opt.blob_allocator);`
`95`	`95`	`if (top_blob.empty())`
`96`	`96`	`return -100;`
`97`	`97`
`@@ -195,10 +195,10 @@ int Packing_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& op`
`195`	`195`	`if (dims == 3)`
`196`	`196`	`{`
`197`	`197`	`int size = w * h;`
`198`		`- int outc = channels * packing / out_packing;`
`199`		`- size_t out_elemsize = elemsize / packing * out_packing;`
	`198`	`+ int outc = channels * elempack / out_elempack;`
	`199`	`+ size_t out_elemsize = elemsize / elempack * out_elempack;`
`200`	`200`
`201`		`- top_blob.create(w, h, outc, out_elemsize, out_packing, opt.blob_allocator);`
	`201`	`+ top_blob.create(w, h, outc, out_elemsize, out_elempack, opt.blob_allocator);`
`202`	`202`	`if (top_blob.empty())`
`203`	`203`	`return -100;`
`204`	`204`
Original file line number	Diff line number	Diff line change
`@@ -119,10 +119,10 @@ int ReLU_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) const`
`119`	`119`	`int h = bottom_top_blob.h;`
`120`	`120`	`int channels = bottom_top_blob.c;`
`121`	`121`	`int size = w * h;`
`122`		`- int packing = bottom_top_blob.packing;`
	`122`	`+ int elempack = bottom_top_blob.elempack;`
`123`	`123`
`124`	`124`	`#if __ARM_NEON`
`125`		`- if (packing == 4)`
	`125`	`+ if (elempack == 4)`
`126`	`126`	`{`
`127`	`127`	`if (slope == 0.f)`
`128`	`128`	`{`