diff --git a/modelconverter/packages/rvc4/exporter.py b/modelconverter/packages/rvc4/exporter.py index 93401b5..ae212b6 100644 --- a/modelconverter/packages/rvc4/exporter.py +++ b/modelconverter/packages/rvc4/exporter.py @@ -40,6 +40,10 @@ def __init__(self, config: SingleStageConfig, output_dir: Path): self.snpe_onnx_to_dlc = rvc4_cfg.snpe_onnx_to_dlc_args self.snpe_dlc_quant = rvc4_cfg.snpe_dlc_quant_args self.snpe_dlc_graph_prepare = rvc4_cfg.snpe_dlc_graph_prepare_args + self.use_per_channel_quantization = ( + rvc4_cfg.use_per_channel_quantization + ) + self.use_per_row_quantization = rvc4_cfg.use_per_row_quantization self.keep_raw_images = rvc4_cfg.keep_raw_images if "--htp_socs" in self.snpe_dlc_graph_prepare: i = self.snpe_dlc_graph_prepare.index("--htp_socs") @@ -104,6 +108,12 @@ def calibrate(self, dlc_path: Path) -> Path: self._add_args(args, ["--input_dlc", dlc_path]) self._add_args(args, ["--output_dlc", quantized_dlc_path]) + if self.use_per_channel_quantization: + args.append("--use_per_channel_quantization") + + if self.use_per_row_quantization: + args.append("--use_per_row_quantization") + start_time = time.time() self._subprocess_run( ["snpe-dlc-quant", *args], meta_name="quantization_cmd" diff --git a/modelconverter/utils/config.py b/modelconverter/utils/config.py index aab6582..d5cdeec 100644 --- a/modelconverter/utils/config.py +++ b/modelconverter/utils/config.py @@ -329,6 +329,8 @@ class RVC4Config(TargetConfig): snpe_dlc_quant_args: List[str] = [] snpe_dlc_graph_prepare_args: List[str] = [] keep_raw_images: bool = False + use_per_channel_quantization: bool = True + use_per_row_quantization: bool = False htp_socs: List[ Literal["sm8350", "sm8450", "sm8550", "sm8650", "qcs6490", "qcs8550"] ] = ["sm8550"] diff --git a/shared_with_container/configs/defaults.yaml b/shared_with_container/configs/defaults.yaml index d4fa9a2..b29c890 100644 --- a/shared_with_container/configs/defaults.yaml +++ b/shared_with_container/configs/defaults.yaml @@ -188,3 +188,11 @@ stages: # Whether to include the raw images in the intermediate outputs. # Warning: the raw images can get very large. keep_raw_images: False + + # Selects per-axis-element quantization for the weights + # and biases of certain layer types. + # Only Convolution, Deconvolution, and FullyConnected are supported. + use_per_channel_quantization: True + + # Enables row wise quantization of Matmul and FullyConnected ops. + use_per_row_quantization: False diff --git a/tests/test_utils/test_config.py b/tests/test_utils/test_config.py index 9395d11..8f6f013 100644 --- a/tests/test_utils/test_config.py +++ b/tests/test_utils/test_config.py @@ -51,6 +51,8 @@ "keep_raw_images": False, "htp_socs": ["sm8550"], "disable_calibration": False, + "use_per_channel_quantization": True, + "use_per_row_quantization": False, }, "hailo": { "optimization_level": 2,