Skip to content

Commit d2dd1e9

Browse files
authored
Merge pull request #3906 from alibaba/feature/sync
MNN:Sync: Sync Internal 3.2.4
2 parents 6d97e40 + 7fde7c7 commit d2dd1e9

File tree

165 files changed

+5252
-1652
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

165 files changed

+5252
-1652
lines changed

.gitignore

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -378,8 +378,4 @@ datasets/*
378378
source/backend/qnn/3rdParty/include
379379
project/android/.cxx
380380
pymnn/android/.cxx/
381-
pymnn/android/.cxx/abi_configuration_5u53tc49.jsonz
382-
apps/iOS/MNNLLMChat/MNNLLMiOS/LocalModel/Qwen3-0.6B-MNN
383-
apps/iOS/MNNLLMChat/Chat/
384-
apps/iOS/MNNLLMChat/swift-transformers/
385-
apps/iOS/MNNLLMChat/MNNLLMiOS/LocalModel/Qwen2.5-Omni-3B-MNN
381+
pymnn/android/.cxx/abi_configuration_5u53tc49.json

CMakeLists.txt

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ option(MNN_ENABLE_COVERAGE "Build with coverage enable" OFF)
7171
option(MNN_BUILD_PROTOBUFFER "Build with protobuffer in MNN" ON)
7272
option(MNN_BUILD_OPENCV "Build OpenCV api in MNN." OFF)
7373
option(MNN_BUILD_LLM "Build llm library based MNN." OFF)
74+
option(MNN_BUILD_LLM_OMNI "If build llm library, build it with omni (support image / audio)" OFF)
7475
option(MNN_BUILD_DIFFUSION "Build diffusion demo based MNN." OFF)
7576
option(MNN_INTERNAL "Build with MNN internal features, such as model authentication, metrics logging" OFF)
7677
option(MNN_JNI "Build MNN Jni for java to use" OFF)
@@ -104,6 +105,21 @@ IF (NOT DEFINED MNN_USE_SPARSE_COMPUTE)
104105
set(MNN_USE_SPARSE_COMPUTE ON)
105106
ENDIF()
106107

108+
IF (MNN_BUILD_LLM)
109+
set(MNN_LOW_MEMORY ON)
110+
set(MNN_SUPPORT_TRANSFORMER_FUSE ON)
111+
IF (MNN_BUILD_LLM_OMNI)
112+
set(MNN_BUILD_OPENCV ON)
113+
set(MNN_BUILD_AUDIO ON)
114+
ENDIF()
115+
ENDIF()
116+
117+
IF (MNN_BUILD_DIFFUSION)
118+
set(MNN_LOW_MEMORY ON)
119+
set(MNN_SUPPORT_TRANSFORMER_FUSE ON)
120+
set(MNN_BUILD_OPENCV ON)
121+
ENDIF()
122+
107123
IF(NOT MNN_BUILD_SHARED_LIBS AND MNN_SEP_BUILD)
108124
message(WARNING "Close MNN_SEP_BUILD for static library")
109125
SET(MNN_SEP_BUILD OFF CACHE BOOL "<docstring>" FORCE)
@@ -244,11 +260,14 @@ option(MNN_KLEIDIAI "Enable KLEIDIAI" ON)
244260
option(MNN_ONEDNN "Enable oneDNN" OFF)
245261
option(MNN_AVX2 "Open AVX2 Compile for x86 if possible" ON)
246262
option(MNN_AVX512 "Enable AVX512" OFF)
263+
option(MNN_USE_RVV "Enable RVV" OFF)
247264
option(MNN_CUDA "Enable CUDA" OFF)
248265
option(MNN_TENSORRT "Enable TensorRT" OFF)
249266
option(MNN_COREML "Enable CoreML" OFF)
250267
option(MNN_NNAPI "Enable NNAPI" OFF)
251-
option(MNN_USE_RVV "Enable RVV" OFF)
268+
option(MNN_QNN "Enable QNN" OFF)
269+
option(MNN_QNN_ONLINE_FINALIZE "Enable QNN Online Finalize" ON)
270+
252271
option(MNN_GPU_TIME_PROFILE "Enable time profiling for the OpenCL backend and Vulkan backend." OFF)
253272

254273
option(MNN_CUDA_PROFILE "Enable CUDA profile" OFF)
@@ -257,6 +276,10 @@ if (NOT MNN_CUDA OR NOT CMAKE_SYSTEM_NAME MATCHES "^Linux")
257276
set(MNN_CUDA_PROFILE OFF)
258277
endif()
259278

279+
if (NOT MNN_QNN)
280+
set(MNN_QNN_ONLINE_FINALIZE OFF)
281+
endif()
282+
260283
if (MNN_USE_THREAD_POOL)
261284
message(STATUS "Use Threadpool, forbid openmp")
262285
set(MNN_OPENMP OFF)
@@ -297,6 +320,7 @@ message(STATUS "\toneDNN: ${MNN_ONEDNN}")
297320
message(STATUS "\tTensorRT: ${MNN_TENSORRT}")
298321
message(STATUS "\tCoreML: ${MNN_COREML}")
299322
message(STATUS "\tNNAPI: ${MNN_NNAPI}")
323+
message(STATUS "\tQNN: ${MNN_QNN}")
300324
message(STATUS "\tCUDA: ${MNN_CUDA}")
301325
message(STATUS "\tOpenMP: ${MNN_OPENMP}")
302326
message(STATUS "\tBF16: ${MNN_SUPPORT_BF16}")
@@ -518,6 +542,7 @@ ENDIF()
518542
IF(MNN_BUILD_LLM)
519543
file(GLOB MNN_LLM_HDRS ${CMAKE_CURRENT_SOURCE_DIR}/transformers/llm/engine/include/llm/*)
520544
list(APPEND MNN_EXTRA_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/transformers/llm/engine/include/llm/llm.hpp)
545+
list(APPEND MNN_EXTRA_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/transformers/llm/engine/include/llm/reranker.hpp)
521546
ENDIF()
522547

523548
IF(MNN_BUILD_DIFFUSION)
@@ -783,7 +808,6 @@ ENDIF()
783808
ENDIF()
784809

785810
IF(MNN_BUILD_LLM)
786-
# add_definitions(-DMNN_BUILD_LLM)
787811
include(${CMAKE_CURRENT_LIST_DIR}/transformers/llm/engine/CMakeLists.txt)
788812
IF(NOT MNN_SEP_BUILD)
789813
list(APPEND MNN_TARGETS llm)

docs/compile/cmake.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ MNN使用CMake构建项目,CMake中的宏定义列表如下:
5757
| MNN_COREML | 是否构建`CoreML`后端,默认为`OFF` |
5858
| MNN_NNAPI | 是否构建`NNAPI`后端,默认为`OFF` |
5959
| MNN_QNN | 是否构建`QNN`后端,默认为`OFF` |
60+
| MNN_QNN_ONLINE_FINALIZE |`MNN_QNN`开启的基础上,是否构建在线编译模式的QNN后端,默认为`ON` |
6061
| MNN_QNN_CONVERT_MODE |`MNN_QNN`开启的基础上,是否构建Convert模式的QNN后端,默认为`OFF` |
6162
| MNN_NPU | 是否构建HIAI的`NPU`后端,默认为`OFF` |
6263
| MNN_USE_SPARSE_COMPUTE | 是否使用稀疏计算,默认为`ON` |
@@ -95,7 +96,8 @@ MNN使用CMake构建项目,CMake中的宏定义列表如下:
9596
| MNN_CPU_WEIGHT_DEQUANT_GEMM | 是否编译CPU权重反量化的矩阵乘Kernel, 如果打开该编译宏并且在CPU推理时设置MNN::BackendConfig::MemoryMode=Memory_Normal,就会使用权重反量化算子进行权重量化模型的推理,默认为`OFF` |
9697
| MNN_SUPPORT_RENDER | 是否支持图形渲染相关算子实现,默认为 `OFF` |
9798
| MNN_SUPPORT_TRANSFORMER_FUSE | 是否支持Fuse Transformer相关OP实现,默认为 `OFF` |
98-
| MNN_BUILD_LLM | 是否构建基于MNN的llm库和demo,默认为`OFF` |
99-
| MNN_BUILD_DIFFUSION | 是否构建基于MNN的diffusion demo,需要打开MNN_BUILD_OPENCV和MNN_IMGCODECS宏使用 默认为`OFF` |
99+
| MNN_BUILD_LLM | 是否构建基于MNN的llm库和demo,默认为`OFF` ,打开时 MNN_LOW_MEMORY , MNN_SUPPORT_TRANSFORMER_FUSE 对应开启|
100+
| MNN_BUILD_LLM_OMNI | 若构建基于MNN的llm库和demo,是否支持图像和音频输入功能,默认为`OFF` 。仅在MNN_BUILD_LLM 打开时生效。开启时 MNN_BUILD_OPENCV , MNN_IMGCODECS , MNN_BUILD_AUDIO 同时打开|
101+
| MNN_BUILD_DIFFUSION | 是否构建基于MNN的diffusion demo,默认为`OFF` . 打开时MNN_BUILD_OPENCV , MNN_IMGCODECS, MNN_LOW_MEMORY, MNN_SUPPORT_TRANSFORMER_FUSE 同步开启|
100102
| MNN_KLEIDIAI | 是否集成ARM的klediAI加速库,默认为`ON` |
101103
| MNN_USE_RVV | 是否启用RISC-V向量扩展支持,默认为`OFF` |

docs/compile/engine.md

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,8 @@ mkdir build && cd build && cmake .. -DCMAKE_OSX_ARCHITECTURES=arm64 && make -j8
3333

3434
## Windows(Visual-Studio)
3535
- 编译环境
36-
建议使用`Windows Terminal`,选择`VS`环境的Tab后进行编译, 如下:
37-
![image.png](../_static/images/compile/vs_shell.png)
38-
如不使用`Windows Terminal`,可参考如下:
39-
- 64位编译:在设置中找到vcvars64.bat(适用于 VS 2017 的 x64 本机工具命令提示)并单击,打开VS编译x64架构程序的虚拟环境
40-
- 32位编译:在设置中找到vcvarsamd64_x86.bat(VS 2017的 x64_x86 交叉工具命令提示符)并单击,打开VS交叉编译x86架构程序的虚拟环境
36+
- 64位编译:在设置中找到`vcvars64.bat`(适用于 VS 2017 的 x64 本机工具命令提示)并单击,打开VS编译x64架构程序的虚拟环境
37+
- 32位编译:在设置中找到`vcvarsamd64_x86.bat`(VS 2017的 `x64_x86` 交叉工具命令提示符)并单击,打开VS交叉编译x86架构程序的虚拟环境
4138
- 环境要求
4239
- Microsoft Visual Studio >= 2017
4340
- cmake >= 3.13

docs/inference/npu.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ cp -r ${LIB_SRC} ${LIB_DEST}
3131
```
3232

3333
### QNN后端编译
34-
编译 MNN 时打开编译宏`MNN_QNN`,即`-DMNN_QNN=ON`
34+
- 编译 MNN 时打开编译宏`MNN_QNN`,即`-DMNN_QNN=ON`
35+
- 如果运行离线编译QNN模型(离线编译方法:使用MNN2QNNModel工具),需要开启`MNN_WITH_PLUGIN`宏。若需要减小库体积,可以选择关闭`MNN_QNN_ONLINE_FINALIZE`
36+
3537

3638
### QNN后端运行
3739
- Backend Type设置为`MNN_FORWARD_NN`,即 5 。

docs/tools/convert.md

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -335,27 +335,29 @@ cat mobilenet_v1.json
335335
## MNN2QNNModel
336336
### 功能
337337
利用QNN工具将mnn模型转为可以在QNN运行的mnn模型结构文件以及QNN离线序列化模型,后续可以在QNN上运行该离线模型。
338-
- 注意:该工具目前仅支持在Linux环境运行,需要提前下载QNN SDK,参考QNN环境准备(docs/inference/npu.md)
338+
- 注意:该工具目前仅支持在Linux环境(推荐Ubuntu22.04)运行,需要提前下载QNN SDK,参考QNN环境准备(docs/inference/npu.md)
339339
### 参数
340-
`Usage: ./MNN2QNNModel src.mnn dst.mnn qnn_sdk_path qnn_model_name qnn_context_config.json`
340+
`Usage: ./MNN2QNNModel src.mnn dst.mnn qnn_sdk_path qnn_model_name qnn_context_config.json input_type_size input_type_0/input_type_mnn_0 input_type_1/input_type_mnn_1`
341341
- `src.mnn:str` 源mnn模型文件路径
342342
- `dst.mnn:str` 目标mnn模型文件路径
343-
- `qnn_sdk_path:str` QNN SDK绝对路径
343+
- `qnn_sdk_path:str` QNN SDK绝对路径,建议2.37及之后的版本
344344
- `qnn_model_name:str` 转完后的QNN模型图名字,同时需要新建同名文件夹,后续生成的QNN产物放在该目录下
345345
- `qnn_context_config.json:str` QNN生成context binary的配置文件(示例文件:source/backend/qnn/convertor/config_example/context_config.json和source/backend/qnn/convertor/config_example/htp_backend_extensions.json),通常需要改context_config.json文件中路径地址,htp_backend_extensions.json中graph_names(需要与qnn_model_name保持一致)、soc_id、dsp_arch(根据机型参考[高通官网的设备架构表](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/overview.html#supported-snapdragon-devices)进行设置)
346+
- (可选)`input_type_size:str`,对于要指定输入形状或者多种输入形状的场景,设置输入形状的种类数。例如想生成2种类型的输入形状,则设置2,后面两个参数则需要传入2中输入形状。
347+
- 说明:输入形状设置可以是:input_type_x直接设置shape字符串,或者是input_type_mnn设置输入的mnn文件。通常建议单输入单输出模型采用直接设置字符串形状较方便,多输入多输出模型通过mnn模型方式更容易避免设置出错。
348+
- (可选)`input_type_x`说明:单输入模型形状设置例如'1x3x512x512'。多输入模型形状设置每个输入形状使用'_'连接,例如'1x3x512x512_1x128'。多输入多输出模型建议使用input.mnn为输入形状设置。
346349
### 使用示例
347350
```
348351
cd mnn_path
349352
mkdir build
350353
cd build
351354
// 确保已经把高通SDK头文件拷贝到对应路径
352-
cmake .. -DMNN_QNN=ON -DMNN_QNN_CONVERT_MODE=ON -DMNN_SUPPORT_TRANSFORMER_FUSE=ON -DMNN_WITH_PLUGIN=ON
355+
cmake .. -DMNN_QNN=ON -DMNN_QNN_CONVERT_MODE=ON -DMNN_SUPPORT_TRANSFORMER_FUSE=ON
353356
make -j16
354-
355-
// 新建qnn_model_name名字文件夹,后续产物放在这里
356-
mkdir qnn_smolvlm_model
357-
358-
./MNN2QNNModel mnnfuse_smolvlm/visual.mnn qnn_smolvlm_model.mnn /mnt/2Tpartition/huaiqian/QNN_DEV/qairt_2_32 qnn_smolvlm_model ../source/backend/qnn/convertor/config_example/context_config.json
357+
```
358+
#### 默认输入形状(不手动设置输入形状)
359+
```
360+
./MNN2QNNModel mnnfuse_smolvlm/visual.mnn qnn_smolvlm_model.mnn /mnt/2Tpartition/tianbu/QNN/qairt/2.37.0.250724 qnn_smolvlm_model ../source/backend/qnn/convertor/config_example/context_config.json
359361
360362
Can't open file:/sys/devices/system/cpu/cpufreq/schedutil/affected_cpus
361363
Can't open file:/sys/devices/system/cpu/cpufreq/boost/affected_cpus
@@ -375,9 +377,12 @@ qnn-context-binary-generator pid:1490535
375377
npu model path:./qnn_smolvlm_model.bin
376378
[All Pass]: npu model generator success!
377379
```
378-
`[All Pass]: npu model generator success!`说明整个过程成功。结果:
380+
`[All Pass]: npu model generator success!`说明整个过程成功。
381+
382+
#### 推理方式说明
379383
- 生成所需的两个模型dst.mnn和qnn_model_name/binary/qnn_model_name.bin两个QNN文件。
380-
- 将这两个文件替换原来src.mnn使用,运行设置为CPU后端,
384+
- 将这两个文件替换原来src.mnn使用
385+
- 需要打开`MNN_WITH_PLUGIN`宏重新编译,运行后端设置为CPU
381386
- 正确性验证,例如:
382387
```
383388
/*
@@ -388,6 +393,19 @@ npu model path:./qnn_smolvlm_model.bin
388393
389394
./ModuleBasic.out qnn_smolvlm_model.mnn dir 0 0 10
390395
```
396+
397+
#### 自定义输入形状(多种推理尺寸设置)
398+
```
399+
./MNN2QNNModel /home/mnnteam/tianbu/models/mnnfuse_fastvlm2_q8b0sym/visual.mnn ./qnn_models/fastvlm_visual_8_sym_57_v75.mnn /mnt/2Tpartition/tianbu/QNN/qairt/2.37.0.250724/ fastvlm_visual_8_sym_57_v75 ../source/backend/qnn/convertor/config_example/context_config.json 2 1x3x512x512 1x3x1024x1024
400+
401+
...
402+
[Pass]: qnn-context-binary-generator success!
403+
[All Pass]: npu model generator success!
404+
[Output Product]:
405+
New mnn model path: ./qnn_models/fastvlm_visual_8_sym_57_v75.mnn
406+
Npu model path: ./qnn_models/fastvlm_visual_8_sym_57_v75_combined.bin
407+
```
408+
391409
### 生成多种QNN设备模型脚本
392410
tools/script/genQNNModelsFromMNN.py中提供了8Gen1 ~ 8Elite设备的QNN模型生成脚本
393411
```

0 commit comments

Comments
 (0)