Skip to content

Commit c4a0074

Browse files
authored
windows clang ci (#5469)
* windows clang ci * clang msvc use x86intrin.h for xop * test arm64 compiler features
1 parent 1b7e635 commit c4a0074

12 files changed

+290
-43
lines changed

.github/workflows/windows-clang.yml

+85
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
name: windows-clang
2+
on:
3+
push:
4+
branches: [master]
5+
paths:
6+
- '.github/workflows/windows-clang.yml'
7+
- 'CMakeLists.txt'
8+
- 'cmake/**'
9+
- 'src/*'
10+
- 'src/layer/*'
11+
- 'src/layer/arm/**'
12+
- 'src/layer/x86/**'
13+
- 'src/layer/vulkan/**'
14+
- 'tests/**'
15+
pull_request:
16+
branches: [master]
17+
paths:
18+
- '.github/workflows/windows-clang.yml'
19+
- 'CMakeLists.txt'
20+
- 'cmake/**'
21+
- 'src/*'
22+
- 'src/layer/*'
23+
- 'src/layer/arm/**'
24+
- 'src/layer/x86/**'
25+
- 'src/layer/vulkan/**'
26+
- 'tests/**'
27+
concurrency:
28+
group: windows-clang-${{ github.ref }}
29+
cancel-in-progress: true
30+
permissions:
31+
contents: read
32+
33+
jobs:
34+
windows:
35+
name: ClangCL
36+
runs-on: windows-2022
37+
38+
env:
39+
UseMultiToolTask: true
40+
NCNN_CMAKE_OPTIONS: -DNCNN_BUILD_TOOLS=OFF -DNCNN_BUILD_EXAMPLES=OFF
41+
42+
steps:
43+
- uses: actions/checkout@v4
44+
with:
45+
submodules: true
46+
47+
- name: arm64
48+
run: |
49+
mkdir build-arm64; cd build-arm64
50+
cmake -T ClangCL -A arm64 ${{ env.NCNN_CMAKE_OPTIONS }} -DNCNN_VULKAN=OFF ..
51+
cmake --build . --config Release -j 4
52+
53+
- name: arm64-vulkan
54+
run: |
55+
mkdir build-arm64-vulkan; cd build-arm64-vulkan
56+
cmake -T ClangCL -A arm64 ${{ env.NCNN_CMAKE_OPTIONS }} -DNCNN_VULKAN=ON -DNCNN_SHARED_LIB=ON ..
57+
cmake --build . --config Release -j 4
58+
59+
- name: x86
60+
run: |
61+
mkdir build-x86; cd build-x86
62+
cmake -T ClangCL -A Win32 ${{ env.NCNN_CMAKE_OPTIONS }} -DNCNN_BUILD_TESTS=ON -DNCNN_VULKAN=OFF ..
63+
cmake --build . --config Release -j 4
64+
- name: x86-test
65+
run: cd build-x86; ctest -C Release --output-on-failure -j 4
66+
67+
- name: x86-vulkan
68+
run: |
69+
mkdir build-x86-vulkan; cd build-x86-vulkan
70+
cmake -T ClangCL -A Win32 ${{ env.NCNN_CMAKE_OPTIONS }} -DNCNN_VULKAN=ON -DNCNN_SHARED_LIB=ON ..
71+
cmake --build . --config Release -j 4
72+
73+
- name: x64
74+
run: |
75+
mkdir build-x64; cd build-x64
76+
cmake -T ClangCL -A x64 ${{ env.NCNN_CMAKE_OPTIONS }} -DNCNN_BUILD_TESTS=ON -DNCNN_VULKAN=OFF ..
77+
cmake --build . --config Release -j 4
78+
- name: x64-test
79+
run: cd build-x64; ctest -C Release --output-on-failure -j 4
80+
81+
- name: x64-vulkan
82+
run: |
83+
mkdir build-x64-vulkan; cd build-x64-vulkan
84+
cmake -T ClangCL -A x64 ${{ env.NCNN_CMAKE_OPTIONS }} -DNCNN_VULKAN=ON -DNCNN_SHARED_LIB=ON ..
85+
cmake --build . --config Release -j 4

CMakeLists.txt

+72-7
Original file line numberDiff line numberDiff line change
@@ -134,11 +134,15 @@ endif()
134134
##############################################
135135

136136
include(CheckCXXCompilerFlag)
137-
check_cxx_source_compiles("int main() { int a = 0; asm volatile(\"\" : \"=r\"(a) : \"0\"(a) : \"memory\"); return 0; }" NCNN_COMPILER_SUPPORT_GNU_INLINE_ASM)
138-
if(NCNN_COMPILER_SUPPORT_GNU_INLINE_ASM)
139-
option(NCNN_GNU_INLINE_ASM "optimize platform with gnu style inline assembly" ON)
140-
else()
141-
message(WARNING "The compiler does not support gnu style inline assembly. NCNN_GNU_INLINE_ASM will be OFF.")
137+
138+
# gnu inline assembly in clang msvc does not work actually
139+
if(NOT (CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")))
140+
check_cxx_source_compiles("int main() { int a = 0; asm volatile(\"\" : \"=r\"(a) : \"0\"(a) : \"memory\"); return 0; }" NCNN_COMPILER_SUPPORT_GNU_INLINE_ASM)
141+
if(NCNN_COMPILER_SUPPORT_GNU_INLINE_ASM)
142+
option(NCNN_GNU_INLINE_ASM "optimize platform with gnu style inline assembly" ON)
143+
else()
144+
message(WARNING "The compiler does not support gnu style inline assembly. NCNN_GNU_INLINE_ASM will be OFF.")
145+
endif()
142146
endif()
143147

144148
if((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm")
@@ -178,7 +182,7 @@ if((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm")
178182
endif()
179183

180184
if(CMAKE_SIZEOF_VOID_P EQUAL 8 OR NCNN_TARGET_ILP32)
181-
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
185+
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
182186
set(CMAKE_REQUIRED_FLAGS "/arch:armv8.0")
183187
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float32x4_t _a; float16x4_t _s = vcvt_f16_f32(_a); return 0; }" NCNN_COMPILER_SUPPORT_ARM_VFPV4)
184188

@@ -212,6 +216,41 @@ if((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm")
212216
set(CMAKE_REQUIRED_FLAGS "/arch:armv8.6")
213217
check_cxx_source_compiles("#include <arm_sve.h>\nint main() { svfloat32_t _s, _a, _b; _s = svmmla_f32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVEF32MM)
214218

219+
unset(CMAKE_REQUIRED_FLAGS)
220+
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")
221+
set(CMAKE_REQUIRED_FLAGS "/arch:armv8.0")
222+
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float32x4_t _a; float16x4_t _s = vcvt_f16_f32(_a); return 0; }" NCNN_COMPILER_SUPPORT_ARM_VFPV4)
223+
224+
set(CMAKE_REQUIRED_FLAGS "/arch:armv8.2 -march=armv8.2-a+fp16")
225+
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float16x8_t _s, _a, _b; _s = vfmaq_f16(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM82_FP16)
226+
227+
set(CMAKE_REQUIRED_FLAGS "/arch:armv8.2 -march=armv8.2-a+dotprod")
228+
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int32x4_t _s; int8x16_t _a, _b; _s = vdotq_s32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM82_DOTPROD)
229+
230+
set(CMAKE_REQUIRED_FLAGS "/arch:armv8.2 -march=armv8.2-a+fp16fml")
231+
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float32x4_t _s; float16x8_t _a, _b; _s = vfmlalq_low_f16(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM82_FP16FML)
232+
233+
set(CMAKE_REQUIRED_FLAGS "/arch:armv8.4 -march=armv8.4-a+bf16")
234+
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float32x4_t _s; bfloat16x8_t _a, _b; _s = vcvt_f32_bf16(vcvt_bf16_f32(vbfmmlaq_f32(_s, _a, _b))); return 0; }" NCNN_COMPILER_SUPPORT_ARM84_BF16)
235+
236+
set(CMAKE_REQUIRED_FLAGS "/arch:armv8.4 -march=armv8.4-a+i8mm")
237+
check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int32x4_t _s; int8x16_t _a, _b; _s = vmmlaq_s32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM84_I8MM)
238+
239+
set(CMAKE_REQUIRED_FLAGS "/arch:armv8.6 -march=armv8.6-a+sve")
240+
check_cxx_source_compiles("#include <arm_sve.h>\nint main() { svfloat16_t _s, _a, _b; svbool_t bp; _s = svmla_f16_z(bp, _s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVE)
241+
242+
set(CMAKE_REQUIRED_FLAGS "/arch:armv8.6 -march=armv8.6-a+sve2")
243+
check_cxx_source_compiles("#include <arm_sve.h>\nint main() { svint16_t _s; svint8_t _a, _b; _s = svmlslb_s16(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVE2)
244+
245+
set(CMAKE_REQUIRED_FLAGS "/arch:armv8.6 -march=armv8.6-a+sve+bf16")
246+
check_cxx_source_compiles("#include <arm_sve.h>\nint main() { svfloat32_t _s; svbfloat16_t _a, _b; _s = svbfmmla_f32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVEBF16)
247+
248+
set(CMAKE_REQUIRED_FLAGS "/arch:armv8.6 -march=armv8.6-a+sve+i8mm")
249+
check_cxx_source_compiles("#include <arm_sve.h>\nint main() { svint32_t _s; svint8_t _a, _b; _s = svmmla_s32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVEI8MM)
250+
251+
set(CMAKE_REQUIRED_FLAGS "/arch:armv8.6 -march=armv8.6-a+sve+f32mm")
252+
check_cxx_source_compiles("#include <arm_sve.h>\nint main() { svfloat32_t _s, _a, _b; _s = svmmla_f32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_ARM86_SVEF32MM)
253+
215254
unset(CMAKE_REQUIRED_FLAGS)
216255
else()
217256
set(CMAKE_REQUIRED_FLAGS "-march=armv8-a")
@@ -447,7 +486,7 @@ else()
447486

448487
option(NCNN_SSE2 "optimize x86 platform with sse2 extension" ON)
449488

450-
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
489+
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
451490
check_cxx_compiler_flag("/arch:AVX" NCNN_COMPILER_SUPPORT_X86_AVX)
452491
check_cxx_compiler_flag("/arch:AVX" NCNN_COMPILER_SUPPORT_X86_FMA)
453492
check_cxx_compiler_flag("/arch:AVX" NCNN_COMPILER_SUPPORT_X86_XOP)
@@ -467,6 +506,32 @@ else()
467506
set(CMAKE_REQUIRED_FLAGS "/arch:AVX512")
468507
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m512h _s, _a, _b; _s = _mm512_fmadd_ph(_s, _a, _b); __m512 _s2; _s2 = _mm512_cvtxph_ps(_mm512_cvtxps_ph(_s2)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_FP16)
469508

509+
unset(CMAKE_REQUIRED_FLAGS)
510+
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")
511+
check_cxx_compiler_flag("/arch:AVX" NCNN_COMPILER_SUPPORT_X86_AVX)
512+
513+
set(CMAKE_REQUIRED_FLAGS "/arch:AVX -mfma -mf16c")
514+
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256 _s, _a, _b; _s = _mm256_fmadd_ps(_a, _b, _s); return 0; }" NCNN_COMPILER_SUPPORT_X86_FMA)
515+
516+
set(CMAKE_REQUIRED_FLAGS "/arch:AVX -mxop")
517+
check_cxx_source_compiles("#include <x86intrin.h>\nint main() { __m128 _s, _a, _b; _s = _mm_maddd_epi16(_a, _b, _s); return 0; }" NCNN_COMPILER_SUPPORT_X86_XOP)
518+
519+
check_cxx_compiler_flag("/arch:AVX -mf16c" NCNN_COMPILER_SUPPORT_X86_F16C)
520+
check_cxx_compiler_flag("/arch:AVX2 -mfma -mf16c" NCNN_COMPILER_SUPPORT_X86_AVX2)
521+
check_cxx_compiler_flag("/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl" NCNN_COMPILER_SUPPORT_X86_AVX512)
522+
523+
set(CMAKE_REQUIRED_FLAGS "/arch:AVX2 -mfma -mf16c -mavxvnni")
524+
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256i _s, _a, _b; _s = _mm256_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX_VNNI)
525+
526+
set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512vnni")
527+
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m512i _s, _a, _b; _s = _mm512_dpwssd_epi32(_s, _a, _b); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_VNNI)
528+
529+
set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512bf16")
530+
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m256bh _s; __m512bh _a, _b; _s = _mm512_cvtneps_pbh(_mm512_dpbf16_ps(_mm512_cvtpbh_ps(_s), _a, _b)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_BF16)
531+
532+
set(CMAKE_REQUIRED_FLAGS "/arch:AVX512 -mfma -mf16c -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mavx512fp16")
533+
check_cxx_source_compiles("#include <immintrin.h>\nint main() { __m512h _s, _a, _b; _s = _mm512_fmadd_ph(_s, _a, _b); __m512 _s2; _s2 = _mm512_cvtxph_ps(_mm512_cvtxps_ph(_s2)); return 0; }" NCNN_COMPILER_SUPPORT_X86_AVX512_FP16)
534+
470535
unset(CMAKE_REQUIRED_FLAGS)
471536
else()
472537
check_cxx_compiler_flag("-mavx" NCNN_COMPILER_SUPPORT_X86_AVX)

cmake/ncnn_add_layer.cmake

+63-2
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ macro(ncnn_add_layer class)
134134
endif()
135135

136136
if(NCNN_TARGET_ARCH STREQUAL "x86")
137-
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
137+
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
138138
if(NCNN_RUNTIME_CPU AND NCNN_AVX512)
139139
ncnn_add_arch_opt_layer(${class} avx512 "/arch:AVX512 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__")
140140
endif()
@@ -165,6 +165,37 @@ macro(ncnn_add_layer class)
165165
if(NCNN_F16C)
166166
ncnn_add_arch_opt_source(${class} f16c "/arch:AVX /D__SSSE3__ /D__SSE4_1__ /D__F16C__")
167167
endif()
168+
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")
169+
if(NCNN_RUNTIME_CPU AND NCNN_AVX512)
170+
ncnn_add_arch_opt_layer(${class} avx512 "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__")
171+
endif()
172+
if(NCNN_RUNTIME_CPU AND NCNN_FMA)
173+
ncnn_add_arch_opt_layer(${class} fma "/arch:AVX -mfma -mf16c /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__")
174+
endif()
175+
if(NCNN_RUNTIME_CPU AND NCNN_AVX)
176+
ncnn_add_arch_opt_layer(${class} avx "/arch:AVX /D__SSSE3__ /D__SSE4_1__")
177+
endif()
178+
if(NCNN_AVX512VNNI)
179+
ncnn_add_arch_opt_source(${class} avx512vnni "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512vnni /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512VNNI__")
180+
endif()
181+
if(NCNN_AVX512BF16)
182+
ncnn_add_arch_opt_source(${class} avx512bf16 "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512bf16 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512BF16__")
183+
endif()
184+
if(NCNN_AVX512FP16)
185+
ncnn_add_arch_opt_source(${class} avx512fp16 "/arch:AVX512 -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c -mavx512fp16 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512FP16__")
186+
endif()
187+
if(NCNN_AVXVNNI)
188+
ncnn_add_arch_opt_source(${class} avxvnni "/arch:AVX2 -mfma -mf16c -mavxvnni /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVXVNNI__")
189+
endif()
190+
if(NCNN_AVX2)
191+
ncnn_add_arch_opt_source(${class} avx2 "/arch:AVX2 -mfma -mf16c /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__")
192+
endif()
193+
if(NCNN_XOP)
194+
ncnn_add_arch_opt_source(${class} xop "/arch:AVX -mxop /D__SSSE3__ /D__SSE4_1__ /D__XOP__")
195+
endif()
196+
if(NCNN_F16C)
197+
ncnn_add_arch_opt_source(${class} f16c "/arch:AVX -mf16c /D__SSSE3__ /D__SSE4_1__ /D__F16C__")
198+
endif()
168199
else()
169200
if(NCNN_RUNTIME_CPU AND NCNN_AVX512)
170201
ncnn_add_arch_opt_layer(${class} avx512 "-mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c")
@@ -216,7 +247,7 @@ macro(ncnn_add_layer class)
216247
endif()
217248

218249
if(NCNN_TARGET_ARCH STREQUAL "arm" AND (CMAKE_SIZEOF_VOID_P EQUAL 8 OR NCNN_TARGET_ILP32))
219-
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
250+
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
220251
if(NCNN_VFPV4)
221252
ncnn_add_arch_opt_source(${class} vfpv4 " ")
222253
endif()
@@ -246,6 +277,36 @@ macro(ncnn_add_layer class)
246277
endif()
247278
if(NCNN_ARM86SVEF32MM)
248279
endif()
280+
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC")
281+
if(NCNN_VFPV4)
282+
ncnn_add_arch_opt_source(${class} vfpv4 " ")
283+
endif()
284+
if(NCNN_ARM82)
285+
ncnn_add_arch_opt_source(${class} asimdhp "/arch:armv8.2 -march=armv8.2-a+fp16 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC")
286+
endif()
287+
if(NCNN_ARM82DOT)
288+
ncnn_add_arch_opt_source(${class} asimddp "/arch:armv8.2 -march=armv8.2-a+fp16+dotprod /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD")
289+
endif()
290+
if(NCNN_ARM82FP16FML)
291+
ncnn_add_arch_opt_source(${class} asimdfhm "/arch:armv8.2 -march=armv8.2-a+fp16+fp16fml /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_FP16_FML")
292+
endif()
293+
if(NCNN_ARM84BF16)
294+
ncnn_add_arch_opt_source(${class} bf16 "/arch:armv8.4 -march=armv8.4-a+fp16+dotprod+bf16 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD /D__ARM_FEATURE_FP16_FML /D__ARM_FEATURE_BF16_VECTOR_ARITHMETIC")
295+
endif()
296+
if(NCNN_ARM84I8MM)
297+
ncnn_add_arch_opt_source(${class} i8mm "/arch:armv8.4 -march=armv8.4-a+fp16+dotprod+i8mm /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD /D__ARM_FEATURE_FP16_FML /D__ARM_FEATURE_MATMUL_INT8")
298+
endif()
299+
# TODO add support for sve family
300+
if(NCNN_ARM86SVE)
301+
endif()
302+
if(NCNN_ARM86SVE2)
303+
endif()
304+
if(NCNN_ARM86SVEBF16)
305+
endif()
306+
if(NCNN_ARM86SVEI8MM)
307+
endif()
308+
if(NCNN_ARM86SVEF32MM)
309+
endif()
249310
else()
250311
if(NCNN_VFPV4)
251312
ncnn_add_arch_opt_source(${class} vfpv4 " ")

0 commit comments

Comments
 (0)