diff --git a/.aitk/configs/checks.json b/.aitk/configs/checks.json index 6595ca63..0c9fd7d1 100644 --- a/.aitk/configs/checks.json +++ b/.aitk/configs/checks.json @@ -1,6 +1,6 @@ { - "configCheck": 139, - "copyCheck": 179, + "configCheck": 143, + "copyCheck": 183, "extensionCheck": 1, "gitignoreCheck": 38, "inferenceModelCheck": 25, @@ -8,8 +8,8 @@ "licenseCheck": 37, "modelProjectCheck": 39, "oliveCheck": 47, - "oliveJsonCheck": 139, - "pathCheck": 1153, + "oliveJsonCheck": 143, + "pathCheck": 1173, "requirementsCheck": 37, "templateCheck": 1, "venvRequirementsCheck": 13 diff --git a/.aitk/configs/model_list.json b/.aitk/configs/model_list.json index 0ef10cf3..46f2bef9 100644 --- a/.aitk/configs/model_list.json +++ b/.aitk/configs/model_list.json @@ -9,6 +9,7 @@ "QNN", "QNNGPU", "AMDNPU", + "AMDGPU", "NvidiaTRTRTX", "IntelCPU", "IntelGPU", @@ -54,6 +55,7 @@ "QNN", "QNNGPU", "AMDNPU", + "AMDGPU", "NvidiaTRTRTX", "IntelCPU", "IntelGPU", @@ -163,6 +165,7 @@ "QNN", "QNNGPU", "AMDNPU", + "AMDGPU", "NvidiaTRTRTX", "IntelCPU", "IntelGPU", @@ -230,6 +233,7 @@ "QNN", "QNNGPU", "AMDNPU", + "AMDGPU", "NvidiaTRTRTX", "IntelCPU", "IntelGPU", diff --git a/.aitk/docs/guide/ModelList.md b/.aitk/docs/guide/ModelList.md index 56da43a4..b68da570 100644 --- a/.aitk/docs/guide/ModelList.md +++ b/.aitk/docs/guide/ModelList.md @@ -5,23 +5,23 @@ | Model Name | Supported Runtimes | |------------|--------------------| | [Deepseek R1 Distill Llama 8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B) | [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Llama-8B/aitk/deepseek_ov_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Llama-8B/aitk/deepseek_ov_npu_config.json) | -| [Deepseek R1 Distill Qwen 1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) | [Qualcomm NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json), [Qualcomm GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_gpu_config.json), [AMD NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json), [Intel CPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_gpu_config.json), [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_gpu_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json), [DirectML](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json) | +| [Deepseek R1 Distill Qwen 1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) | [Qualcomm NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_config.json), [Qualcomm GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_qnn_gpu_config.json), [AMD NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_vitis_ai_config.json), [AMD GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_migraphx_config.json), [NVIDIA TensorRT for RTX](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_trtrtx_config.json), [Intel CPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_gpu_config.json), [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_gpu_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_ov_config.json), [DirectML](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_dml_config.json) | | [Deepseek R1 Distill Qwen 14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B) | [NVIDIA TensorRT for RTX](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-14B/aitk/deepseek_trtrtx.json), [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-14B/aitk/deepseek_ov_config.json) | | [Deepseek R1 Distill Qwen 7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) | [NVIDIA TensorRT for RTX](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_trtrtx.json), [Intel GPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_ov_config.json), [Intel NPU](../../../deepseek-ai-DeepSeek-R1-Distill-Qwen-7B/aitk/deepseek_ov_npu_config.json) | | [Llama 3.1 8B Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) | [Qualcomm NPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_qnn_config.json), [AMD NPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_trtrtx_config.json), [Intel CPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_ov_gpu_config.json), [Intel GPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_ov_gpu_config.json), [Intel NPU](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_ov_config.json), [DirectML](../../../meta-llama-Llama-3.1-8B-Instruct/aitk/llama3_1_dml_config.json) | -| [Llama 3.2 1B Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct) | [Qualcomm NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_config.json), [Qualcomm GPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_gpu_config.json), [AMD NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json), [Intel CPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_gpu_config.json), [Intel GPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_gpu_config.json), [Intel NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json), [DirectML](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_dml_config.json) | +| [Llama 3.2 1B Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct) | [Qualcomm NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_config.json), [Qualcomm GPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_qnn_gpu_config.json), [AMD NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_vitis_ai_config.json), [AMD GPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_migraphx_config.json), [NVIDIA TensorRT for RTX](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_trtrtx_config.json), [Intel CPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_gpu_config.json), [Intel GPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_gpu_config.json), [Intel NPU](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_ov_config.json), [DirectML](../../../meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_dml_config.json) | | [Mistral 7B Instruct V0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) | [NVIDIA TensorRT for RTX](../../../mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_trtrtx.json), [Intel GPU](../../../mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_gpu_context_ov_dy.json), [Intel NPU](../../../mistralai-Mistral-7B-Instruct-v0.2/aitk/Mistral_7B_Instruct_v0.2_npu_context_ov_dy.json) | | [Mistral 7B Instruct V0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3) | [Intel GPU](../../../mistralai-Mistral-7B-Instruct-v0.3/aitk/mistral-7b-instruct-v0.3-ov.json) | | [Phi 3 Mini 128K Instruct](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct) | [NVIDIA TensorRT for RTX](../../../microsoft-Phi-3-mini-128k-instruct/aitk/phi3_trtrtx.json), [Intel GPU](../../../microsoft-Phi-3-mini-128k-instruct/aitk/phi3_ov_config.json) | | [Phi 3 Mini 4K Instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) | [NVIDIA TensorRT for RTX](../../../microsoft-Phi-3-mini-4k-instruct/aitk/phi3_trtrtx.json), [Intel GPU](../../../microsoft-Phi-3-mini-4k-instruct/aitk/phi3_ov_config.json), [Intel NPU](../../../microsoft-Phi-3-mini-4k-instruct/aitk/phi3_ov_npu_config.json) | -| [Phi 3.5 Mini Instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) | [Qualcomm NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_config.json), [Qualcomm GPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_gpu_config.json), [AMD NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json), [Intel CPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_gpu_config.json), [Intel GPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_gpu_config.json), [Intel NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_config.json), [DirectML](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_dml_config.json) | +| [Phi 3.5 Mini Instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) | [Qualcomm NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_config.json), [Qualcomm GPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_qnn_gpu_config.json), [AMD NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_vitis_ai_config.json), [AMD GPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_migraphx_config.json), [NVIDIA TensorRT for RTX](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_trtrtx_config.json), [Intel CPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_gpu_config.json), [Intel GPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_gpu_config.json), [Intel NPU](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_ov_config.json), [DirectML](../../../microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_dml_config.json) | | [Phi 4](https://huggingface.co/microsoft/Phi-4) | [NVIDIA TensorRT for RTX](../../../microsoft-Phi-4/aitk/phi4_trtrtx.json), [Intel GPU](../../../microsoft-Phi-4/aitk/phi4_ov_config.json) | | [Phi 4 Mini Instruct](https://huggingface.co/microsoft/Phi-4-mini-instruct) | [Intel GPU](../../../microsoft-Phi-4-mini-instruct/aitk/phi4_ov_config.json), [Intel NPU](../../../microsoft-Phi-4-mini-instruct/aitk/phi4_ov_npu_config.json) | | [Phi 4 Mini Reasoning](https://huggingface.co/microsoft/Phi-4-mini-reasoning) | [Intel GPU](../../../microsoft-Phi-4-mini-reasoning/aitk/phi4_ov_gpu_config.json), [Intel NPU](../../../microsoft-Phi-4-mini-reasoning/aitk/phi4_ov_config.json) | | [Phi 4 Reasoning](https://huggingface.co/microsoft/Phi-4-reasoning) | [Intel NPU](../../../microsoft-Phi-4-reasoning/aitk/phi4_ov_config.json) | | [Phi 4 Reasoning Plus](https://huggingface.co/microsoft/Phi-4-reasoning-plus) | [Intel NPU](../../../microsoft-Phi-4-reasoning-plus/aitk/phi4_ov_config.json) | | [Qwen2.5 0.5B Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) | [NVIDIA TensorRT for RTX](../../../Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_trtrtx.json), [Intel GPU](../../../Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_ov_config.json), [Intel NPU](../../../Qwen-Qwen2.5-0.5B-Instruct/aitk/qwen2_5_ov_npu_config.json) | -| [Qwen2.5 1.5B Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) | [Qualcomm NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_config.json), [Qualcomm GPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_gpu_config.json), [AMD NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json), [NVIDIA TensorRT for RTX](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json), [Intel CPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_gpu_config.json), [Intel GPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_gpu_config.json), [Intel NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_config.json), [DirectML](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_dml_config.json) | +| [Qwen2.5 1.5B Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) | [Qualcomm NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_config.json), [Qualcomm GPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_qnn_gpu_config.json), [AMD NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_vitis_ai_config.json), [AMD GPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_migraphx_config.json), [NVIDIA TensorRT for RTX](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_trtrtx_config.json), [Intel CPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_gpu_config.json), [Intel GPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_gpu_config.json), [Intel NPU](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_ov_config.json), [DirectML](../../../Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_dml_config.json) | | [Qwen2.5 14B Instruct](https://huggingface.co/Qwen/Qwen2.5-14B-Instruct) | [NVIDIA TensorRT for RTX](../../../Qwen-Qwen2.5-14B-Instruct/aitk/qwen2_5_trtrtx.json), [Intel GPU](../../../Qwen-Qwen2.5-14B-Instruct/aitk/qwen2_5_ov_config.json) | | [Qwen2.5 3B Instruct](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct) | [Intel GPU](../../../Qwen-Qwen2.5-3B-Instruct/aitk/qwen2_5_ov_config.json) | | [Qwen2.5 7B Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) | [NVIDIA TensorRT for RTX](../../../Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_trtrtx.json), [Intel GPU](../../../Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_ov_config.json), [Intel NPU](../../../Qwen-Qwen2.5-7B-Instruct/aitk/qwen2_5_ov_npu_config.json) | diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config index fbcb485b..2f4efbf2 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/_copy.json.config @@ -83,6 +83,16 @@ { "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py", "dst": "winml.py" + }, + { + "src": "qwen2_5_dml_config.json", + "dst": "qwen2_5_migraphx_config.json", + "replacements": [ + { + "find": "DmlExecutionProvider", + "replace": "MIGraphXExecutionProvider" + } + ] } ] } diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml index ab959901..cc0be08d 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/info.yml @@ -42,6 +42,9 @@ recipes: isGPURequired: true runtimeOverwrite: executeEp: NvTensorRTRTXExecutionProvider + - file: "qwen2_5_migraphx_config.json" + device: gpu + ep: MIGraphXExecutionProvider aitk: modelInfo: id: "huggingface/Qwen/Qwen2.5-1.5B-Instruct" diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config index 8d84fc40..5b980977 100644 --- a/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/model_project.config @@ -27,6 +27,10 @@ { "file": "qwen2_5_qnn_gpu_config.json", "templateName": "qwen2_5_qnn_gpu_config" + }, + { + "file": "qwen2_5_migraphx_config.json", + "templateName": "qwen2_5_migraphx_config" } ], "modelInfo": { diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_migraphx_config.json b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_migraphx_config.json new file mode 100644 index 00000000..66d47504 --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_migraphx_config.json @@ -0,0 +1,43 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "Qwen/Qwen2.5-1.5B-Instruct" + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device":"cpu", + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + }, + "target_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device":"gpu", + "execution_providers": [ + "MIGraphXExecutionProvider" + ] + } + ] + } + }, + "passes": { + "mb": { + "type": "ModelBuilder", + "precision": "int4" + } + }, + "host": "host_system", + "target": "target_system", + "log_severity_level": 1, + "output_dir": "model/qwen2_5", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} diff --git a/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_migraphx_config.json.config b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_migraphx_config.json.config new file mode 100644 index 00000000..ed42a827 --- /dev/null +++ b/Qwen-Qwen2.5-1.5B-Instruct/aitk/qwen2_5_migraphx_config.json.config @@ -0,0 +1,90 @@ +{ + "name": "Convert to AMD GPU", + "isLLM": true, + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "mb" + }, + "needHFLogin": true, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "AMD GPU" + ], + "path": "systems.target_system.accelerators.0.execution_providers.0", + "values": [ + "MIGraphXExecutionProvider" + ], + "readOnly": false + }, + "optimizationPaths": [ + { + "path": "passes.mb.precision" + } + ], + "optimizationDefault": "int4", + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "autoGenerated": true, + "name": "Optimization", + "phase": "Quantization", + "parameters": [ + { + "autoGenerated": true, + "name": "Precision", + "description": "Precision of model", + "type": "enum", + "displayNames": [ + "Int4", + "Bf16", + "Fp16", + "Fp32" + ], + "displayType": "RadioGroup", + "path": "passes.mb.precision", + "values": [ + "int4", + "bf16", + "fp16", + "fp32" + ], + "template": { + "path": "passes.mb.precision", + "template": "ModelBuilderPrecision" + } + } + ], + "disableToggleGeneration": true, + "toggle": { + "autoGenerated": true, + "name": "Optimize model", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/_copy.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/_copy.json.config index 6fd21fb1..1d89cb82 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/_copy.json.config +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/_copy.json.config @@ -3,6 +3,16 @@ { "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py", "dst": "winml.py" + }, + { + "src": "deepseek_dml_config.json", + "dst": "deepseek_migraphx_config.json", + "replacements": [ + { + "find": "DmlExecutionProvider", + "replace": "MIGraphXExecutionProvider" + } + ] } ] } diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_migraphx_config.json b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_migraphx_config.json new file mode 100644 index 00000000..cb9618d9 --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_migraphx_config.json @@ -0,0 +1,43 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device":"cpu", + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + }, + "target_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device":"gpu", + "execution_providers": [ + "MIGraphXExecutionProvider" + ] + } + ] + } + }, + "passes": { + "mb": { + "type": "ModelBuilder", + "precision": "int4" + } + }, + "host": "host_system", + "target": "target_system", + "log_severity_level": 1, + "output_dir": "model/deepseek", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_migraphx_config.json.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_migraphx_config.json.config new file mode 100644 index 00000000..ed42a827 --- /dev/null +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/deepseek_migraphx_config.json.config @@ -0,0 +1,90 @@ +{ + "name": "Convert to AMD GPU", + "isLLM": true, + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "mb" + }, + "needHFLogin": true, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "AMD GPU" + ], + "path": "systems.target_system.accelerators.0.execution_providers.0", + "values": [ + "MIGraphXExecutionProvider" + ], + "readOnly": false + }, + "optimizationPaths": [ + { + "path": "passes.mb.precision" + } + ], + "optimizationDefault": "int4", + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "autoGenerated": true, + "name": "Optimization", + "phase": "Quantization", + "parameters": [ + { + "autoGenerated": true, + "name": "Precision", + "description": "Precision of model", + "type": "enum", + "displayNames": [ + "Int4", + "Bf16", + "Fp16", + "Fp32" + ], + "displayType": "RadioGroup", + "path": "passes.mb.precision", + "values": [ + "int4", + "bf16", + "fp16", + "fp32" + ], + "template": { + "path": "passes.mb.precision", + "template": "ModelBuilderPrecision" + } + } + ], + "disableToggleGeneration": true, + "toggle": { + "autoGenerated": true, + "name": "Optimize model", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml index c0ee7cc8..6d5e38c6 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/info.yml @@ -42,6 +42,9 @@ recipes: isGPURequired: true runtimeOverwrite: executeEp: NvTensorRTRTXExecutionProvider + - file: "deepseek_migraphx_config.json" + device: gpu + ep: MIGraphXExecutionProvider aitk: modelInfo: id: "huggingface/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" diff --git a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config index 5969076f..78840efb 100644 --- a/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config +++ b/deepseek-ai-DeepSeek-R1-Distill-Qwen-1.5B/aitk/model_project.config @@ -27,6 +27,10 @@ { "file": "deepseek_qnn_gpu_config.json", "templateName": "deepseek_qnn_gpu_config" + }, + { + "file": "deepseek_migraphx_config.json", + "templateName": "deepseek_migraphx_config" } ], "modelInfo": { diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config index 5713565e..3c047a2e 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/_copy.json.config @@ -83,6 +83,16 @@ { "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py", "dst": "winml.py" + }, + { + "src": "llama3_2_dml_config.json", + "dst": "llama3_2_migraphx_config.json", + "replacements": [ + { + "find": "DmlExecutionProvider", + "replace": "MIGraphXExecutionProvider" + } + ] } ] } diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml index e3fa19ce..62b901a1 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/info.yml @@ -42,6 +42,9 @@ recipes: isGPURequired: true runtimeOverwrite: executeEp: NvTensorRTRTXExecutionProvider + - file: "llama3_2_migraphx_config.json" + device: gpu + ep: MIGraphXExecutionProvider aitk: modelInfo: id: "huggingface/meta-llama/Llama-3.2-1B-Instruct" diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_migraphx_config.json b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_migraphx_config.json new file mode 100644 index 00000000..7fe316b7 --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_migraphx_config.json @@ -0,0 +1,43 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "meta-llama/Llama-3.2-1B-Instruct" + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device":"cpu", + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + }, + "target_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device":"gpu", + "execution_providers": [ + "MIGraphXExecutionProvider" + ] + } + ] + } + }, + "passes": { + "mb": { + "type": "ModelBuilder", + "precision": "int4" + } + }, + "host": "host_system", + "target": "target_system", + "log_severity_level": 1, + "output_dir": "model/llama3_2", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_migraphx_config.json.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_migraphx_config.json.config new file mode 100644 index 00000000..ed42a827 --- /dev/null +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/llama3_2_migraphx_config.json.config @@ -0,0 +1,90 @@ +{ + "name": "Convert to AMD GPU", + "isLLM": true, + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "mb" + }, + "needHFLogin": true, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "AMD GPU" + ], + "path": "systems.target_system.accelerators.0.execution_providers.0", + "values": [ + "MIGraphXExecutionProvider" + ], + "readOnly": false + }, + "optimizationPaths": [ + { + "path": "passes.mb.precision" + } + ], + "optimizationDefault": "int4", + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "autoGenerated": true, + "name": "Optimization", + "phase": "Quantization", + "parameters": [ + { + "autoGenerated": true, + "name": "Precision", + "description": "Precision of model", + "type": "enum", + "displayNames": [ + "Int4", + "Bf16", + "Fp16", + "Fp32" + ], + "displayType": "RadioGroup", + "path": "passes.mb.precision", + "values": [ + "int4", + "bf16", + "fp16", + "fp32" + ], + "template": { + "path": "passes.mb.precision", + "template": "ModelBuilderPrecision" + } + } + ], + "disableToggleGeneration": true, + "toggle": { + "autoGenerated": true, + "name": "Optimize model", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +} diff --git a/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config b/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config index b63789af..76410976 100644 --- a/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config +++ b/meta-llama-Llama-3.2-1B-Instruct/aitk/model_project.config @@ -27,6 +27,10 @@ { "file": "llama3_2_qnn_gpu_config.json", "templateName": "llama3_2_qnn_gpu_config" + }, + { + "file": "llama3_2_migraphx_config.json", + "templateName": "llama3_2_migraphx_config" } ], "modelInfo": { diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config index 5d9b59ff..779661a6 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config +++ b/microsoft-Phi-3.5-mini-instruct/aitk/_copy.json.config @@ -65,6 +65,16 @@ { "src": "../../intel-bert-base-uncased-mrpc/aitk/winml.py", "dst": "winml.py" + }, + { + "src": "phi3_5_dml_config.json", + "dst": "phi3_5_migraphx_config.json", + "replacements": [ + { + "find": "DmlExecutionProvider", + "replace": "MIGraphXExecutionProvider" + } + ] } ] } diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml index 0b85f0ac..7b4878b2 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/info.yml +++ b/microsoft-Phi-3.5-mini-instruct/aitk/info.yml @@ -42,6 +42,9 @@ recipes: isGPURequired: true runtimeOverwrite: executeEp: NvTensorRTRTXExecutionProvider + - file: "phi3_5_migraphx_config.json" + device: gpu + ep: MIGraphXExecutionProvider aitk: modelInfo: id: "huggingface/microsoft/Phi-3.5-mini-instruct" diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config b/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config index a1ac1eb2..12a6c0b5 100644 --- a/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config +++ b/microsoft-Phi-3.5-mini-instruct/aitk/model_project.config @@ -27,6 +27,10 @@ { "file": "phi3_5_qnn_gpu_config.json", "templateName": "phi3_5_qnn_gpu_config" + }, + { + "file": "phi3_5_migraphx_config.json", + "templateName": "phi3_5_migraphx_config" } ], "modelInfo": { diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_migraphx_config.json b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_migraphx_config.json new file mode 100644 index 00000000..64a60da6 --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_migraphx_config.json @@ -0,0 +1,43 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "microsoft/Phi-3.5-mini-instruct" + }, + "systems": { + "host_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device":"cpu", + "execution_providers": [ + "CPUExecutionProvider" + ] + } + ] + }, + "target_system": { + "type": "LocalSystem", + "accelerators": [ + { + "device":"gpu", + "execution_providers": [ + "MIGraphXExecutionProvider" + ] + } + ] + } + }, + "passes": { + "mb": { + "type": "ModelBuilder", + "precision": "int4" + } + }, + "host": "host_system", + "target": "target_system", + "log_severity_level": 1, + "output_dir": "model/phi3_5", + "cache_dir": "cache", + "no_artifacts": true, + "evaluate_input_model": false +} diff --git a/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_migraphx_config.json.config b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_migraphx_config.json.config new file mode 100644 index 00000000..ed42a827 --- /dev/null +++ b/microsoft-Phi-3.5-mini-instruct/aitk/phi3_5_migraphx_config.json.config @@ -0,0 +1,90 @@ +{ + "name": "Convert to AMD GPU", + "isLLM": true, + "debugInfo": { + "autoGenerated": true, + "useModelBuilder": "mb" + }, + "needHFLogin": true, + "runtime": { + "autoGenerated": true, + "name": "Evaluate on", + "type": "enum", + "displayNames": [ + "AMD GPU" + ], + "path": "systems.target_system.accelerators.0.execution_providers.0", + "values": [ + "MIGraphXExecutionProvider" + ], + "readOnly": false + }, + "optimizationPaths": [ + { + "path": "passes.mb.precision" + } + ], + "optimizationDefault": "int4", + "sections": [ + { + "autoGenerated": true, + "name": "Convert", + "phase": "Conversion", + "parameters": [], + "toggle": { + "autoGenerated": true, + "name": "Convert to ONNX format", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + }, + { + "autoGenerated": true, + "name": "Optimization", + "phase": "Quantization", + "parameters": [ + { + "autoGenerated": true, + "name": "Precision", + "description": "Precision of model", + "type": "enum", + "displayNames": [ + "Int4", + "Bf16", + "Fp16", + "Fp32" + ], + "displayType": "RadioGroup", + "path": "passes.mb.precision", + "values": [ + "int4", + "bf16", + "fp16", + "fp32" + ], + "template": { + "path": "passes.mb.precision", + "template": "ModelBuilderPrecision" + } + } + ], + "disableToggleGeneration": true, + "toggle": { + "autoGenerated": true, + "name": "Optimize model", + "type": "bool", + "path": "passes.mb", + "actions": [ + [], + [] + ], + "readOnly": true + } + } + ] +}