Skip to content

Commit debc33f

Browse files
authored
arm handle allocation failures (#5490)
1 parent b437963 commit debc33f

30 files changed

+468
-66
lines changed

src/layer/arm/concat_arm.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@ int Concat_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>&
159159
if (elempack < out_elempack)
160160
{
161161
convert_packing(top_blob_unpacked, top_blob, out_elempack, opt);
162+
if (top_blob.empty())
163+
return -100;
162164
}
163165
}
164166

@@ -284,6 +286,8 @@ int Concat_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>&
284286
if (elempack < out_elempack)
285287
{
286288
convert_packing(top_blob_unpacked, top_blob, out_elempack, opt);
289+
if (top_blob.empty())
290+
return -100;
287291
}
288292
}
289293

@@ -617,6 +621,8 @@ int Concat_arm::forward_bf16s_fp16s(const std::vector<Mat>& bottom_blobs, std::v
617621
if (elempack < out_elempack)
618622
{
619623
convert_packing(top_blob_unpacked, top_blob, out_elempack, opt);
624+
if (top_blob.empty())
625+
return -100;
620626
}
621627
}
622628

@@ -816,6 +822,8 @@ int Concat_arm::forward_bf16s_fp16s(const std::vector<Mat>& bottom_blobs, std::v
816822
if (elempack < out_elempack)
817823
{
818824
convert_packing(top_blob_unpacked, top_blob, out_elempack, opt);
825+
if (top_blob.empty())
826+
return -100;
819827
}
820828
}
821829

src/layer/arm/convolution_3x3_winograd.h

+33-3
Original file line numberDiff line numberDiff line change
@@ -5578,7 +5578,7 @@ static inline void conv3x3s1_winograd23_transform_output_tile(const Mat& top_til
55785578
}
55795579
}
55805580

5581-
static void conv3x3s1_winograd23(const Mat& bottom_blob, Mat& top_blob, const Mat& AT, const Mat& bias, int nT, const Option& opt)
5581+
static int conv3x3s1_winograd23(const Mat& bottom_blob, Mat& top_blob, const Mat& AT, const Mat& bias, int nT, const Option& opt)
55825582
{
55835583
int outw = top_blob.w;
55845584
int outh = top_blob.h;
@@ -5605,12 +5605,16 @@ static void conv3x3s1_winograd23(const Mat& bottom_blob, Mat& top_blob, const Ma
56055605
// NCNN_LOGE("TILE M/N/K = %d %d %d -> %d %d %d", M, N, K, TILE_M, TILE_N, TILE_K);
56065606

56075607
Mat BT(TILE_K * TILE_N, B, (K + TILE_K - 1) / TILE_K, (N + TILE_N - 1) / TILE_N, 4u, opt.workspace_allocator);
5608+
if (BT.empty())
5609+
return -100;
56085610

56095611
const int nn_NK = nn_N * nn_K;
56105612

56115613
if (nT > 1 && nn_NK < nT)
56125614
{
56135615
Mat B_tile(TILE_N * B * TILE_K, 4u, opt.workspace_allocator);
5616+
if (B_tile.empty())
5617+
return -100;
56145618

56155619
for (int ppjk = 0; ppjk < nn_NK; ppjk++)
56165620
{
@@ -5634,6 +5638,8 @@ static void conv3x3s1_winograd23(const Mat& bottom_blob, Mat& top_blob, const Ma
56345638
else
56355639
{
56365640
Mat B_tileX(TILE_N * B * TILE_K, 1, nT, 4u, opt.workspace_allocator);
5641+
if (B_tileX.empty())
5642+
return -100;
56375643

56385644
#pragma omp parallel for num_threads(nT)
56395645
for (int ppjk = 0; ppjk < nn_NK; ppjk++)
@@ -5659,6 +5665,8 @@ static void conv3x3s1_winograd23(const Mat& bottom_blob, Mat& top_blob, const Ma
56595665
}
56605666

56615667
Mat top_tileX(TILE_N * B * TILE_M, 1, nT, 4u, opt.workspace_allocator);
5668+
if (top_tileX.empty())
5669+
return -100;
56625670

56635671
#pragma omp parallel for num_threads(nT)
56645672
for (int ppj = 0; ppj < nn_M; ppj++)
@@ -5688,6 +5696,8 @@ static void conv3x3s1_winograd23(const Mat& bottom_blob, Mat& top_blob, const Ma
56885696
conv3x3s1_winograd23_transform_output_tile(top_tile, top_blob, bias, i, max_ii, j, max_jj);
56895697
}
56905698
}
5699+
5700+
return 0;
56915701
}
56925702

56935703
static inline void conv3x3s1_winograd43_transform_kernel_tile(const Mat& kernel, Mat& A, int inch, int i, int max_ii, int k, int max_kk)
@@ -7256,7 +7266,7 @@ static inline void conv3x3s1_winograd43_transform_output_tile(const Mat& top_til
72567266
}
72577267
}
72587268

7259-
static void conv3x3s1_winograd43(const Mat& bottom_blob, Mat& top_blob, const Mat& AT, const Mat& bias, int nT, const Option& opt)
7269+
static int conv3x3s1_winograd43(const Mat& bottom_blob, Mat& top_blob, const Mat& AT, const Mat& bias, int nT, const Option& opt)
72607270
{
72617271
int outw = top_blob.w;
72627272
int outh = top_blob.h;
@@ -7283,12 +7293,16 @@ static void conv3x3s1_winograd43(const Mat& bottom_blob, Mat& top_blob, const Ma
72837293
// NCNN_LOGE("TILE M/N/K = %d %d %d -> %d %d %d", M, N, K, TILE_M, TILE_N, TILE_K);
72847294

72857295
Mat BT(TILE_K * TILE_N, B, (K + TILE_K - 1) / TILE_K, (N + TILE_N - 1) / TILE_N, 4u, opt.workspace_allocator);
7296+
if (BT.empty())
7297+
return -100;
72867298

72877299
const int nn_NK = nn_N * nn_K;
72887300

72897301
if (nT > 1 && nn_NK < nT)
72907302
{
72917303
Mat B_tile(TILE_N * B * TILE_K, 4u, opt.workspace_allocator);
7304+
if (B_tile.empty())
7305+
return -100;
72927306

72937307
for (int ppjk = 0; ppjk < nn_NK; ppjk++)
72947308
{
@@ -7312,6 +7326,8 @@ static void conv3x3s1_winograd43(const Mat& bottom_blob, Mat& top_blob, const Ma
73127326
else
73137327
{
73147328
Mat B_tileX(TILE_N * B * TILE_K, 1, nT, 4u, opt.workspace_allocator);
7329+
if (B_tileX.empty())
7330+
return -100;
73157331

73167332
#pragma omp parallel for num_threads(nT)
73177333
for (int ppjk = 0; ppjk < nn_NK; ppjk++)
@@ -7337,6 +7353,8 @@ static void conv3x3s1_winograd43(const Mat& bottom_blob, Mat& top_blob, const Ma
73377353
}
73387354

73397355
Mat top_tileX(TILE_N * B * TILE_M, 1, nT, 4u, opt.workspace_allocator);
7356+
if (top_tileX.empty())
7357+
return -100;
73407358

73417359
#pragma omp parallel for num_threads(nT)
73427360
for (int ppj = 0; ppj < nn_M; ppj++)
@@ -7366,6 +7384,8 @@ static void conv3x3s1_winograd43(const Mat& bottom_blob, Mat& top_blob, const Ma
73667384
conv3x3s1_winograd43_transform_output_tile(top_tile, top_blob, bias, i, max_ii, j, max_jj);
73677385
}
73687386
}
7387+
7388+
return 0;
73697389
}
73707390

73717391
static inline void conv3x3s1_winograd63_transform_kernel_tile(const Mat& kernel, Mat& A, int inch, int i, int max_ii, int k, int max_kk)
@@ -9292,7 +9312,7 @@ static inline void conv3x3s1_winograd63_transform_output_tile(const Mat& top_til
92929312
}
92939313
}
92949314

9295-
static void conv3x3s1_winograd63(const Mat& bottom_blob, Mat& top_blob, const Mat& AT, const Mat& bias, int nT, const Option& opt)
9315+
static int conv3x3s1_winograd63(const Mat& bottom_blob, Mat& top_blob, const Mat& AT, const Mat& bias, int nT, const Option& opt)
92969316
{
92979317
int outw = top_blob.w;
92989318
int outh = top_blob.h;
@@ -9319,12 +9339,16 @@ static void conv3x3s1_winograd63(const Mat& bottom_blob, Mat& top_blob, const Ma
93199339
// NCNN_LOGE("TILE M/N/K = %d %d %d -> %d %d %d", M, N, K, TILE_M, TILE_N, TILE_K);
93209340

93219341
Mat BT(TILE_K * TILE_N, B, (K + TILE_K - 1) / TILE_K, (N + TILE_N - 1) / TILE_N, 4u, opt.workspace_allocator);
9342+
if (BT.empty())
9343+
return -100;
93229344

93239345
const int nn_NK = nn_N * nn_K;
93249346

93259347
if (nT > 1 && nn_NK < nT)
93269348
{
93279349
Mat B_tile(TILE_N * B * TILE_K, 4u, opt.workspace_allocator);
9350+
if (B_tile.empty())
9351+
return -100;
93289352

93299353
for (int ppjk = 0; ppjk < nn_NK; ppjk++)
93309354
{
@@ -9348,6 +9372,8 @@ static void conv3x3s1_winograd63(const Mat& bottom_blob, Mat& top_blob, const Ma
93489372
else
93499373
{
93509374
Mat B_tileX(TILE_N * B * TILE_K, 1, nT, 4u, opt.workspace_allocator);
9375+
if (B_tileX.empty())
9376+
return -100;
93519377

93529378
#pragma omp parallel for num_threads(nT)
93539379
for (int ppjk = 0; ppjk < nn_NK; ppjk++)
@@ -9373,6 +9399,8 @@ static void conv3x3s1_winograd63(const Mat& bottom_blob, Mat& top_blob, const Ma
93739399
}
93749400

93759401
Mat top_tileX(TILE_N * B * TILE_M, 1, nT, 4u, opt.workspace_allocator);
9402+
if (top_tileX.empty())
9403+
return -100;
93769404

93779405
#pragma omp parallel for num_threads(nT)
93789406
for (int ppj = 0; ppj < nn_M; ppj++)
@@ -9402,4 +9430,6 @@ static void conv3x3s1_winograd63(const Mat& bottom_blob, Mat& top_blob, const Ma
94029430
conv3x3s1_winograd63_transform_output_tile(top_tile, top_blob, bias, i, max_ii, j, max_jj);
94039431
}
94049432
}
9433+
9434+
return 0;
94059435
}

src/layer/arm/convolution_3x3_winograd_bf16s.h

+33-3
Original file line numberDiff line numberDiff line change
@@ -920,7 +920,7 @@ static inline void conv3x3s1_winograd23_transform_output_tile_bf16s(const Mat& t
920920
}
921921
}
922922

923-
static void conv3x3s1_winograd23_bf16s(const Mat& bottom_blob, Mat& top_blob, const Mat& AT, const Mat& bias, int nT, const Option& opt)
923+
static int conv3x3s1_winograd23_bf16s(const Mat& bottom_blob, Mat& top_blob, const Mat& AT, const Mat& bias, int nT, const Option& opt)
924924
{
925925
int outw = top_blob.w;
926926
int outh = top_blob.h;
@@ -947,12 +947,16 @@ static void conv3x3s1_winograd23_bf16s(const Mat& bottom_blob, Mat& top_blob, co
947947
// NCNN_LOGE("TILE M/N/K = %d %d %d -> %d %d %d", M, N, K, TILE_M, TILE_N, TILE_K);
948948

949949
Mat BT(TILE_K * TILE_N, B, (K + TILE_K - 1) / TILE_K, (N + TILE_N - 1) / TILE_N, 4u, opt.workspace_allocator);
950+
if (BT.empty())
951+
return -100;
950952

951953
const int nn_NK = nn_N * nn_K;
952954

953955
if (nT > 1 && nn_NK < nT)
954956
{
955957
Mat B_tile(TILE_N * B * TILE_K, 4u, opt.workspace_allocator);
958+
if (B_tile.empty())
959+
return -100;
956960

957961
for (int ppjk = 0; ppjk < nn_NK; ppjk++)
958962
{
@@ -976,6 +980,8 @@ static void conv3x3s1_winograd23_bf16s(const Mat& bottom_blob, Mat& top_blob, co
976980
else
977981
{
978982
Mat B_tileX(TILE_N * B * TILE_K, 1, nT, 4u, opt.workspace_allocator);
983+
if (B_tileX.empty())
984+
return -100;
979985

980986
#pragma omp parallel for num_threads(nT)
981987
for (int ppjk = 0; ppjk < nn_NK; ppjk++)
@@ -1001,6 +1007,8 @@ static void conv3x3s1_winograd23_bf16s(const Mat& bottom_blob, Mat& top_blob, co
10011007
}
10021008

10031009
Mat top_tileX(TILE_N * B * TILE_M, 1, nT, 4u, opt.workspace_allocator);
1010+
if (top_tileX.empty())
1011+
return -100;
10041012

10051013
#pragma omp parallel for num_threads(nT)
10061014
for (int ppj = 0; ppj < nn_M; ppj++)
@@ -1030,6 +1038,8 @@ static void conv3x3s1_winograd23_bf16s(const Mat& bottom_blob, Mat& top_blob, co
10301038
conv3x3s1_winograd23_transform_output_tile_bf16s(top_tile, top_blob, bias, i, max_ii, j, max_jj);
10311039
}
10321040
}
1041+
1042+
return 0;
10331043
}
10341044

10351045
static inline void conv3x3s1_winograd43_transform_input_tile_bf16s(const Mat& bottom_blob, Mat& B, int j, int max_jj, int k, int max_kk, int nT)
@@ -2497,7 +2507,7 @@ static inline void conv3x3s1_winograd43_transform_output_tile_bf16s(const Mat& t
24972507
}
24982508
}
24992509

2500-
static void conv3x3s1_winograd43_bf16s(const Mat& bottom_blob, Mat& top_blob, const Mat& AT, const Mat& bias, int nT, const Option& opt)
2510+
static int conv3x3s1_winograd43_bf16s(const Mat& bottom_blob, Mat& top_blob, const Mat& AT, const Mat& bias, int nT, const Option& opt)
25012511
{
25022512
int outw = top_blob.w;
25032513
int outh = top_blob.h;
@@ -2524,12 +2534,16 @@ static void conv3x3s1_winograd43_bf16s(const Mat& bottom_blob, Mat& top_blob, co
25242534
// NCNN_LOGE("TILE M/N/K = %d %d %d -> %d %d %d", M, N, K, TILE_M, TILE_N, TILE_K);
25252535

25262536
Mat BT(TILE_K * TILE_N, B, (K + TILE_K - 1) / TILE_K, (N + TILE_N - 1) / TILE_N, 4u, opt.workspace_allocator);
2537+
if (BT.empty())
2538+
return -100;
25272539

25282540
const int nn_NK = nn_N * nn_K;
25292541

25302542
if (nT > 1 && nn_NK < nT)
25312543
{
25322544
Mat B_tile(TILE_N * B * TILE_K, 4u, opt.workspace_allocator);
2545+
if (B_tile.empty())
2546+
return -100;
25332547

25342548
for (int ppjk = 0; ppjk < nn_NK; ppjk++)
25352549
{
@@ -2553,6 +2567,8 @@ static void conv3x3s1_winograd43_bf16s(const Mat& bottom_blob, Mat& top_blob, co
25532567
else
25542568
{
25552569
Mat B_tileX(TILE_N * B * TILE_K, 1, nT, 4u, opt.workspace_allocator);
2570+
if (B_tileX.empty())
2571+
return -100;
25562572

25572573
#pragma omp parallel for num_threads(nT)
25582574
for (int ppjk = 0; ppjk < nn_NK; ppjk++)
@@ -2578,6 +2594,8 @@ static void conv3x3s1_winograd43_bf16s(const Mat& bottom_blob, Mat& top_blob, co
25782594
}
25792595

25802596
Mat top_tileX(TILE_N * B * TILE_M, 1, nT, 4u, opt.workspace_allocator);
2597+
if (top_tileX.empty())
2598+
return -100;
25812599

25822600
#pragma omp parallel for num_threads(nT)
25832601
for (int ppj = 0; ppj < nn_M; ppj++)
@@ -2607,6 +2625,8 @@ static void conv3x3s1_winograd43_bf16s(const Mat& bottom_blob, Mat& top_blob, co
26072625
conv3x3s1_winograd43_transform_output_tile_bf16s(top_tile, top_blob, bias, i, max_ii, j, max_jj);
26082626
}
26092627
}
2628+
2629+
return 0;
26102630
}
26112631

26122632
static inline void conv3x3s1_winograd63_transform_input_tile_bf16s(const Mat& bottom_blob, Mat& B, int j, int max_jj, int k, int max_kk, int nT)
@@ -4428,7 +4448,7 @@ static inline void conv3x3s1_winograd63_transform_output_tile_bf16s(const Mat& t
44284448
}
44294449
}
44304450

4431-
static void conv3x3s1_winograd63_bf16s(const Mat& bottom_blob, Mat& top_blob, const Mat& AT, const Mat& bias, int nT, const Option& opt)
4451+
static int conv3x3s1_winograd63_bf16s(const Mat& bottom_blob, Mat& top_blob, const Mat& AT, const Mat& bias, int nT, const Option& opt)
44324452
{
44334453
int outw = top_blob.w;
44344454
int outh = top_blob.h;
@@ -4455,12 +4475,16 @@ static void conv3x3s1_winograd63_bf16s(const Mat& bottom_blob, Mat& top_blob, co
44554475
// NCNN_LOGE("TILE M/N/K = %d %d %d -> %d %d %d", M, N, K, TILE_M, TILE_N, TILE_K);
44564476

44574477
Mat BT(TILE_K * TILE_N, B, (K + TILE_K - 1) / TILE_K, (N + TILE_N - 1) / TILE_N, 4u, opt.workspace_allocator);
4478+
if (BT.empty())
4479+
return -100;
44584480

44594481
const int nn_NK = nn_N * nn_K;
44604482

44614483
if (nT > 1 && nn_NK < nT)
44624484
{
44634485
Mat B_tile(TILE_N * B * TILE_K, 4u, opt.workspace_allocator);
4486+
if (B_tile.empty())
4487+
return -100;
44644488

44654489
for (int ppjk = 0; ppjk < nn_NK; ppjk++)
44664490
{
@@ -4484,6 +4508,8 @@ static void conv3x3s1_winograd63_bf16s(const Mat& bottom_blob, Mat& top_blob, co
44844508
else
44854509
{
44864510
Mat B_tileX(TILE_N * B * TILE_K, 1, nT, 4u, opt.workspace_allocator);
4511+
if (B_tileX.empty())
4512+
return -100;
44874513

44884514
#pragma omp parallel for num_threads(nT)
44894515
for (int ppjk = 0; ppjk < nn_NK; ppjk++)
@@ -4509,6 +4535,8 @@ static void conv3x3s1_winograd63_bf16s(const Mat& bottom_blob, Mat& top_blob, co
45094535
}
45104536

45114537
Mat top_tileX(TILE_N * B * TILE_M, 1, nT, 4u, opt.workspace_allocator);
4538+
if (top_tileX.empty())
4539+
return -100;
45124540

45134541
#pragma omp parallel for num_threads(nT)
45144542
for (int ppj = 0; ppj < nn_M; ppj++)
@@ -4538,4 +4566,6 @@ static void conv3x3s1_winograd63_bf16s(const Mat& bottom_blob, Mat& top_blob, co
45384566
conv3x3s1_winograd63_transform_output_tile_bf16s(top_tile, top_blob, bias, i, max_ii, j, max_jj);
45394567
}
45404568
}
4569+
4570+
return 0;
45414571
}

0 commit comments

Comments
 (0)