Skip to content

Commit 08b7d99

Browse files
authored
rnn/lstm/gru dynamic quantization (#5435)
1 parent be15dbe commit 08b7d99

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+11129
-1726
lines changed

.ci/test-coverage.yml

+1
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ jobs:
187187
- { SSE2: 'ON', AVX: 'OFF', XOP: 'OFF', F16C: 'OFF', FMA: 'OFF', AVX2: 'OFF', AVX512: 'OFF', AVX512VNNI: 'OFF', AVXVNNI: 'OFF', AVX512BF16: 'OFF', AVX512FP16: 'OFF'}
188188
- { SSE2: 'ON', AVX: 'ON', XOP: 'OFF', F16C: 'OFF', FMA: 'OFF', AVX2: 'OFF', AVX512: 'OFF', AVX512VNNI: 'OFF', AVXVNNI: 'OFF', AVX512BF16: 'OFF', AVX512FP16: 'OFF'}
189189
- { SSE2: 'ON', AVX: 'ON', XOP: 'OFF', F16C: 'ON', FMA: 'ON', AVX2: 'ON', AVX512: 'OFF', AVX512VNNI: 'OFF', AVXVNNI: 'OFF', AVX512BF16: 'OFF', AVX512FP16: 'OFF'}
190+
- { SSE2: 'ON', AVX: 'ON', XOP: 'OFF', F16C: 'ON', FMA: 'ON', AVX2: 'ON', AVX512: 'ON', AVX512VNNI: 'OFF', AVXVNNI: 'OFF', AVX512BF16: 'OFF', AVX512FP16: 'OFF'}
190191
- { SSE2: 'ON', AVX: 'ON', XOP: 'OFF', F16C: 'ON', FMA: 'ON', AVX2: 'ON', AVX512: 'ON', AVX512VNNI: 'ON', AVXVNNI: 'OFF', AVX512BF16: 'OFF', AVX512FP16: 'OFF'}
191192

192193
runs-on:

cmake/ncnn_add_layer.cmake

+10-10
Original file line numberDiff line numberDiff line change
@@ -136,34 +136,34 @@ macro(ncnn_add_layer class)
136136
if(NCNN_TARGET_ARCH STREQUAL "x86")
137137
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
138138
if(NCNN_RUNTIME_CPU AND NCNN_AVX512)
139-
ncnn_add_arch_opt_layer(${class} avx512 "/arch:AVX512 /D__SSE4_1__ /D__FMA__ /D__F16C__")
139+
ncnn_add_arch_opt_layer(${class} avx512 "/arch:AVX512 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__")
140140
endif()
141141
if(NCNN_RUNTIME_CPU AND NCNN_FMA)
142-
ncnn_add_arch_opt_layer(${class} fma "/arch:AVX /D__SSE4_1__ /D__FMA__ /D__F16C__")
142+
ncnn_add_arch_opt_layer(${class} fma "/arch:AVX /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__")
143143
endif()
144144
if(NCNN_RUNTIME_CPU AND NCNN_AVX)
145-
ncnn_add_arch_opt_layer(${class} avx "/arch:AVX /D__SSE4_1__")
145+
ncnn_add_arch_opt_layer(${class} avx "/arch:AVX /D__SSSE3__ /D__SSE4_1__")
146146
endif()
147147
if(NCNN_AVX512VNNI)
148-
ncnn_add_arch_opt_source(${class} avx512vnni "/arch:AVX512 /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512VNNI__")
148+
ncnn_add_arch_opt_source(${class} avx512vnni "/arch:AVX512 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512VNNI__")
149149
endif()
150150
if(NCNN_AVX512BF16)
151-
ncnn_add_arch_opt_source(${class} avx512bf16 "/arch:AVX512 /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512BF16__")
151+
ncnn_add_arch_opt_source(${class} avx512bf16 "/arch:AVX512 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512BF16__")
152152
endif()
153153
if(NCNN_AVX512FP16)
154-
ncnn_add_arch_opt_source(${class} avx512fp16 "/arch:AVX512 /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512FP16__")
154+
ncnn_add_arch_opt_source(${class} avx512fp16 "/arch:AVX512 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVX512FP16__")
155155
endif()
156156
if(NCNN_AVXVNNI)
157-
ncnn_add_arch_opt_source(${class} avxvnni "/arch:AVX2 /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVXVNNI__")
157+
ncnn_add_arch_opt_source(${class} avxvnni "/arch:AVX2 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__ /D__AVXVNNI__")
158158
endif()
159159
if(NCNN_AVX2)
160-
ncnn_add_arch_opt_source(${class} avx2 "/arch:AVX2 /D__SSE4_1__ /D__FMA__ /D__F16C__")
160+
ncnn_add_arch_opt_source(${class} avx2 "/arch:AVX2 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__")
161161
endif()
162162
if(NCNN_XOP)
163-
ncnn_add_arch_opt_source(${class} xop "/arch:AVX /D__SSE4_1__ /D__XOP__")
163+
ncnn_add_arch_opt_source(${class} xop "/arch:AVX /D__SSSE3__ /D__SSE4_1__ /D__XOP__")
164164
endif()
165165
if(NCNN_F16C)
166-
ncnn_add_arch_opt_source(${class} f16c "/arch:AVX /D__SSE4_1__ /D__F16C__")
166+
ncnn_add_arch_opt_source(${class} f16c "/arch:AVX /D__SSSE3__ /D__SSE4_1__ /D__F16C__")
167167
endif()
168168
else()
169169
if(NCNN_RUNTIME_CPU AND NCNN_AVX512)

docs/how-to-use-and-FAQ/quantized-int8-inference.md

+6
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,12 @@ If your model has multiple input nodes, you can use multiple list files and othe
4848
./ncnn2int8 mobilenet-opt.param mobilenet-opt.bin mobilenet-int8.param mobilenet-int8.bin mobilenet.table
4949
```
5050

51+
If you don’t need static quantization, ncnn supports RNN/LSTM/GRU dynamic quantization. In this case, you can omit the table file.
52+
53+
```shell
54+
./ncnn2int8 rnn-model.param rnn-model.bin rnn-model-int8.param rnn-model-int8.bin
55+
```
56+
5157
## use ncnn int8 inference
5258

5359
the ncnn library would use int8 inference automatically, nothing changed in your code

src/CMakeLists.txt

+4-4
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ if(NCNN_TARGET_ARCH STREQUAL "x86")
404404

405405
if(NOT NCNN_RUNTIME_CPU AND NCNN_AVX512)
406406
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
407-
target_compile_options(ncnn PRIVATE /arch:AVX512 /D__SSE4_1__ /D__FMA__ /D__F16C__)
407+
target_compile_options(ncnn PRIVATE /arch:AVX512 /D__SSSE3__ /D__SSE4_1__ /D__FMA__ /D__F16C__)
408408
if(NCNN_AVX512VNNI)
409409
target_compile_options(ncnn PRIVATE /D__AVX512VNNI__)
410410
endif()
@@ -423,9 +423,9 @@ if(NCNN_TARGET_ARCH STREQUAL "x86")
423423
elseif(NOT NCNN_RUNTIME_CPU AND NCNN_FMA)
424424
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
425425
if(NCNN_AVX2)
426-
target_compile_options(ncnn PRIVATE /arch:AVX2 /D__SSE4_1__ /D__FMA__)
426+
target_compile_options(ncnn PRIVATE /arch:AVX2 /D__SSSE3__ /D__SSE4_1__ /D__FMA__)
427427
else()
428-
target_compile_options(ncnn PRIVATE /arch:AVX /D__SSE4_1__ /D__FMA__)
428+
target_compile_options(ncnn PRIVATE /arch:AVX /D__SSSE3__ /D__SSE4_1__ /D__FMA__)
429429
endif()
430430
if(NCNN_AVXVNNI)
431431
target_compile_options(ncnn PRIVATE /D__AVXVNNI__)
@@ -452,7 +452,7 @@ if(NCNN_TARGET_ARCH STREQUAL "x86")
452452
endif()
453453
elseif(NOT NCNN_RUNTIME_CPU AND NCNN_AVX)
454454
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
455-
target_compile_options(ncnn PRIVATE /arch:AVX /D__SSE4_1__)
455+
target_compile_options(ncnn PRIVATE /arch:AVX /D__SSSE3__ /D__SSE4_1__)
456456
if(NCNN_XOP)
457457
target_compile_options(ncnn PRIVATE /D__XOP__)
458458
endif()

0 commit comments

Comments
 (0)