diff --git a/microsoft-resnet-50/aitk/resnet_qdq_amd_plus_estimate_npu_latency.json b/microsoft-resnet-50/aitk/resnet_qdq_amd_plus_estimate_npu_latency.json new file mode 100644 index 00000000..32a4a4e2 --- /dev/null +++ b/microsoft-resnet-50/aitk/resnet_qdq_amd_plus_estimate_npu_latency.json @@ -0,0 +1,138 @@ +{ + "input_model": { + "type": "HfModel", + "model_path": "microsoft/resnet-50", + "task": "image-classification", + "io_config": { + "input_names": [ + "pixel_values" + ], + "input_shapes": [ + [ + 1, + 3, + 224, + 224 + ] + ], + "output_names": [ + "logits" + ] + } + }, + "systems": { + "target_system": { + "type": "LocalSystem" + } + }, + "data_configs": [ + { + "name": "quantize_data_config", + "type": "HuggingfaceContainer", + "user_script": "imagenet.py", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "train", + "streaming": true + }, + "pre_process_data_config": { + "type": "dataset_pre_process", + "size": 64, + "cache_key": "imagedata_quantization", + "transpose": true + }, + "post_process_data_config": { + "type": "dataset_post_process" + } + }, + { + "name": "evaluation_data_config", + "type": "HuggingfaceContainer", + "user_script": "imagenet.py", + "load_dataset_config": { + "data_name": "timm/mini-imagenet", + "split": "validation", + "streaming": true + }, + "pre_process_data_config": { + "type": "dataset_pre_process", + "size": 1000, + "cache_key": "imagedata_evaluation", + "transpose": true + }, + "post_process_data_config": { + "type": "dataset_post_process" + } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "data_config": "quantize_data_config", + "sub_types": [ + { + "name": "accuracy_score", + "priority": 1, + "metric_config": { + "task": "multiclass", + "num_classes": 1001 + } + } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "quantize_data_config", + "sub_types": [ + { + "name": "avg", + "priority": 2 + } + ] + } + ] + } + }, + "passes": { + "conversion": { + "device": "cpu", + "type": "OnnxConversion", + "target_opset": 17, + "save_as_external_data": true, + "all_tensors_to_one_file": true, + "dynamic": false, + "use_dynamo_exporter": false + }, + "transpose_input": { + "type": "InputNCHWtoNHWC" + }, + "OnnxQuantization": { + "type": "OnnxQuantization", + "data_config": "quantize_data_config", + "activation_type": "uint8", + "precision": "uint8", + "calibrate_method": "MinMax", + "save_as_external_data": true + }, + "addmetadata": { + "type": "VitisAIAddMetaData", + "config_meta_data_keys": [ + "architectures", + "model_type" + ], + "activation_type": "uint8", + "weight_type": "uint8", + "quant_type": "OnnxStaticQuantization" + }, + "estimate_npu_latency": { "type": "EstimateNPULatency", "target_device": "stx" } + }, + "target": "target_system", + "evaluator": "common_evaluator", + "cache_dir": "cache", + "output_dir": "model/resnet_ptq_qnn", + "evaluate_input_model": false +} diff --git a/timm-mobilenetv3_small_100.lamb_in1k/olive/config_plus_estimate_npu_latency.json b/timm-mobilenetv3_small_100.lamb_in1k/olive/config_plus_estimate_npu_latency.json new file mode 100644 index 00000000..f13a5a3a --- /dev/null +++ b/timm-mobilenetv3_small_100.lamb_in1k/olive/config_plus_estimate_npu_latency.json @@ -0,0 +1,61 @@ +{ + "input_model": { + "type": "PytorchModel", + "model_path": "timm/mobilenetv3_small_100.lamb_in1k", + "model_loader": "load_timm", + "model_script": "user_script.py", + "io_config": { + "input_names": [ "x" ], + "input_shapes": [ [ 1, 3, 224, 224 ] ], + "output_names": [ "output" ] + } + }, + "systems": { + "local_system": { + "type": "LocalSystem" + } + }, + "data_configs": [ + { + "name": "imagenet1k", + "type": "HuggingfaceContainer", + "user_script": "imagenet.py", + "load_dataset_config": { "data_name": "imagenet-1k", "split": "validation", "streaming": true }, + "pre_process_data_config": { "type": "dataset_pre_process", "size": 256, "cache_key": "imagenet256" }, + "post_process_data_config": { "type": "imagenet_post_fun" } + } + ], + "evaluators": { + "common_evaluator": { + "metrics": [ + { + "name": "accuracy", + "type": "accuracy", + "backend": "huggingface_metrics", + "data_config": "imagenet1k", + "sub_types": [ + { "name": "accuracy", "priority": 1, "goal": { "type": "max-degradation", "value": 0.05 } } + ] + }, + { + "name": "latency", + "type": "latency", + "data_config": "imagenet1k", + "sub_types": [ + { "name": "avg", "priority": 2, "goal": { "type": "percent-min-improvement", "value": 0.1 } }, + { "name": "max" }, + { "name": "min" } + ] + } + ] + } + }, + "passes": { + "conversion": { "type": "OnnxConversion" }, + "matmul4": { "type": "OnnxBlockWiseRtnQuantization", "accuracy_level": 4 }, + "mnb_to_qdq": { "type": "MatMulNBitsToQDQ" }, + "estimate_npu_latency": { "type": "EstimateNPULatency", "target_device": "stx" } + }, + "evaluator": "common_evaluator", + "output_dir": "models/timm" +}