@@ -134,11 +134,15 @@ endif()
134
134
##############################################
135
135
136
136
include (CheckCXXCompilerFlag)
137
- check_cxx_source_compiles("int main() { int a = 0; asm volatile(\"\" : \" =r\" (a) : \" 0\" (a) : \" memory\" ); return 0; }" NCNN_COMPILER_SUPPORT_GNU_INLINE_ASM)
138
- if (NCNN_COMPILER_SUPPORT_GNU_INLINE_ASM)
139
- option (NCNN_GNU_INLINE_ASM "optimize platform with gnu style inline assembly" ON )
140
- else ()
141
- message (WARNING "The compiler does not support gnu style inline assembly. NCNN_GNU_INLINE_ASM will be OFF." )
137
+
138
+ # gnu inline assembly in clang msvc does not work actually
139
+ if (NOT (CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC" )))
140
+ check_cxx_source_compiles("int main() { int a = 0; asm volatile(\"\" : \" =r\" (a) : \" 0\" (a) : \" memory\" ); return 0; }" NCNN_COMPILER_SUPPORT_GNU_INLINE_ASM)
141
+ if (NCNN_COMPILER_SUPPORT_GNU_INLINE_ASM)
142
+ option (NCNN_GNU_INLINE_ASM "optimize platform with gnu style inline assembly" ON )
143
+ else ()
144
+ message (WARNING "The compiler does not support gnu style inline assembly. NCNN_GNU_INLINE_ASM will be OFF." )
145
+ endif ()
142
146
endif ()
143
147
144
148
if ((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm" )
@@ -178,7 +182,7 @@ if((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm")
178
182
endif ()
179
183
180
184
if (CMAKE_SIZEOF_VOID_P EQUAL 8 OR NCNN_TARGET_ILP32)
181
- if (CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC" ) )
185
+ if (CMAKE_CXX_COMPILER_ID MATCHES "MSVC" )
182
186
set (CMAKE_REQUIRED_FLAGS "/arch:armv8.0" )
183
187
check_cxx_source_compiles("#include <arm_neon.h>\n int main() { float32x4_t _a; float16x4_t _s = vcvt_f16_f32(_a); return 0; }" NCNN_COMPILER_SUPPORT_ARM_VFPV4)
184
188
@@ -212,6 +216,41 @@ if((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm")
212
216
set (CMAKE_REQUIRED_FLAGS "/arch:armv8.6" )
213
217
check_cxx_source_compiles("#include <arm_sve.h>\n int main() { svfloat32_t _s, _a, _b; _s = svmmla_f32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVEF32MM)
214
218
219
+ unset (CMAKE_REQUIRED_FLAGS)
220
+ elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC" )
221
+ set (CMAKE_REQUIRED_FLAGS "/arch:armv8.0" )
222
+ check_cxx_source_compiles("#include <arm_neon.h>\n int main() { float32x4_t _a; float16x4_t _s = vcvt_f16_f32(_a); return 0; }" NCNN_COMPILER_SUPPORT_ARM_VFPV4)
223
+
224
+ set (CMAKE_REQUIRED_FLAGS "/arch:armv8.2 -march=armv8.2-a+fp16" )
225
+ check_cxx_source_compiles("#include <arm_neon.h>\n int main() { float16x8_t _s, _a, _b; _s = vfmaq_f16(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM82_FP16)
226
+
227
+ set (CMAKE_REQUIRED_FLAGS "/arch:armv8.2 -march=armv8.2-a+dotprod" )
228
+ check_cxx_source_compiles("#include <arm_neon.h>\n int main() { int32x4_t _s; int8x16_t _a, _b; _s = vdotq_s32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM82_DOTPROD)
229
+
230
+ set (CMAKE_REQUIRED_FLAGS "/arch:armv8.2 -march=armv8.2-a+fp16fml" )
231
+ check_cxx_source_compiles("#include <arm_neon.h>\n int main() { float32x4_t _s; float16x8_t _a, _b; _s = vfmlalq_low_f16(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM82_FP16FML)
232
+
233
+ set (CMAKE_REQUIRED_FLAGS "/arch:armv8.4 -march=armv8.4-a+bf16" )
234
+ check_cxx_source_compiles("#include <arm_neon.h>\n int main() { float32x4_t _s; bfloat16x8_t _a, _b; _s = vcvt_f32_bf16(vcvt_bf16_f32(vbfmmlaq_f32(_s, _a, _b))); return 0; }" NCNN_COMPILER_SUPPORT_ARM84_BF16)
235
+
236
+ set (CMAKE_REQUIRED_FLAGS "/arch:armv8.4 -march=armv8.4-a+i8mm" )
237
+ check_cxx_source_compiles("#include <arm_neon.h>\n int main() { int32x4_t _s; int8x16_t _a, _b; _s = vmmlaq_s32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM84_I8MM)
238
+
239
+ set (CMAKE_REQUIRED_FLAGS "/arch:armv8.6 -march=armv8.6-a+sve" )
240
+ check_cxx_source_compiles("#include <arm_sve.h>\n int main() { svfloat16_t _s, _a, _b; svbool_t bp; _s = svmla_f16_z(bp, _s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVE)
241
+
242
+ set (CMAKE_REQUIRED_FLAGS "/arch:armv8.6 -march=armv8.6-a+sve2" )
243
+ check_cxx_source_compiles("#include <arm_sve.h>\n int main() { svint16_t _s; svint8_t _a, _b; _s = svmlslb_s16(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVE2)
244
+
245
+ set (CMAKE_REQUIRED_FLAGS "/arch:armv8.6 -march=armv8.6-a+sve+bf16" )
246
+ check_cxx_source_compiles("#include <arm_sve.h>\n int main() { svfloat32_t _s; svbfloat16_t _a, _b; _s = svbfmmla_f32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVEBF16)
247
+
248
+ set (CMAKE_REQUIRED_FLAGS "/arch:armv8.6 -march=armv8.6-a+sve+i8mm" )
249
+ check_cxx_source_compiles("#include <arm_sve.h>\n int main() { svint32_t _s; svint8_t _a, _b; _s = svmmla_s32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVEI8MM)
250
+
251
+ set (CMAKE_REQUIRED_FLAGS "/arch:armv8.6 -march=armv8.6-a+sve+f32mm" )
252
+ check_cxx_source_compiles("#include <arm_sve.h>\n int main() { svfloat32_t _s, _a, _b; _s = svmmla_f32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVEF32MM)
253
+
215
254
unset (CMAKE_REQUIRED_FLAGS)
216
255
else ()
217
256
set (CMAKE_REQUIRED_FLAGS "-march=armv8-a" )
@@ -447,7 +486,7 @@ else()
447
486
448
487
option (NCNN_SSE2 "optimize x86 platform with sse2 extension" ON )
449
488
450
- if (CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC" ) )
489
+ if (CMAKE_CXX_COMPILER_ID MATCHES "MSVC" )
451
490
check_cxx_compiler_flag("/arch:AVX" NCNN_COMPILER_SUPPORT_X86_AVX)
452
491
check_cxx_compiler_flag("/arch:AVX" NCNN_COMPILER_SUPPORT_X86_FMA)
453
492
check_cxx_compiler_flag("/arch:AVX" NCNN_COMPILER_SUPPORT_X86_XOP)
@@ -467,6 +506,32 @@ else()
467
506
set (CMAKE_REQUIRED_FLAGS "/arch:AVX512" )
468
507
check_cxx_source_compiles("#include <immintrin.h>\n int main() { __m512h _s, _a, _b; _s = _mm512_fmadd_ph(_s, _a, _b); __m512 _s2; _s2 = _mm512_cvtxph_ps(_mm512_cvtxps_ph(_s2)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_FP16)
469
508
509
+ unset (CMAKE_REQUIRED_FLAGS)
510
+ elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC" )
511
+ check_cxx_compiler_flag("/arch:AVX" NCNN_COMPILER_SUPPORT_X86_AVX)
512
+
513
+ set (CMAKE_REQUIRED_FLAGS "/arch:AVX -mfma -mf16c" )
514
+ check_cxx_source_compiles("#include <immintrin.h>\n int main() { __m256 _s, _a, _b; _s = _mm256_fmadd_ps(_a, _b, _s); return 0; }" NCNN_COMPILER_SUPPORT_X86_FMA)
515
+
516
+ set (CMAKE_REQUIRED_FLAGS "/arch:AVX -mxop" )
517
+ check_cxx_source_compiles("#include <x86intrin.h>\n int main() { __m128 _s, _a, _b; _s = _mm_maddd_epi16(_a, _b, _s); return 0; }" NCNN_COMPILER_SUPPORT_X86_XOP)
518
+
519
+ check_cxx_compiler_flag("/arch:AVX -mf16c" NCNN_COMPILER_SUPPORT_X86_F16C)
520
+ check_cxx_compiler_flag("/arch:AVX2 -mfma -mf16c" NCNN_COMPILER_SUPPORT_X86_AVX2)
521
+ check_cxx_compiler_flag("/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl" NCNN_COMPILER_SUPPORT_X86_AVX512)
522
+
523
+ set (CMAKE_REQUIRED_FLAGS "/arch:AVX2 -mfma -mf16c -mavxvnni" )
524
+ check_cxx_source_compiles("#include <immintrin.h>\n int main() { __m256i _s, _a, _b; _s = _mm256_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX_VNNI)
525
+
526
+ set (CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512vnni" )
527
+ check_cxx_source_compiles("#include <immintrin.h>\n int main() { __m512i _s, _a, _b; _s = _mm512_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI)
528
+
529
+ set (CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512bf16" )
530
+ check_cxx_source_compiles("#include <immintrin.h>\n int main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16)
531
+
532
+ set (CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512fp16" )
533
+ check_cxx_source_compiles("#include <immintrin.h>\n int main() { __m512h _s, _a, _b; _s = _mm512_fmadd_ph(_s, _a, _b); __m512 _s2; _s2 = _mm512_cvtxph_ps(_mm512_cvtxps_ph(_s2)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_FP16)
534
+
470
535
unset (CMAKE_REQUIRED_FLAGS)
471
536
else ()
472
537
check_cxx_compiler_flag("-mavx" NCNN_COMPILER_SUPPORT_X86_AVX)
0 commit comments