luxonis · kozlov721 · Oct 31, 2024 · Oct 30, 2024 · Oct 30, 2024 · Oct 30, 2024
@@ -40,6 +40,10 @@ def __init__(self, config: SingleStageConfig, output_dir: Path):
         self.snpe_onnx_to_dlc = rvc4_cfg.snpe_onnx_to_dlc_args
         self.snpe_dlc_quant = rvc4_cfg.snpe_dlc_quant_args
         self.snpe_dlc_graph_prepare = rvc4_cfg.snpe_dlc_graph_prepare_args
+        self.use_per_channel_quantization = (
+            rvc4_cfg.use_per_channel_quantization
+        )
+        self.use_per_row_quantization = rvc4_cfg.use_per_row_quantization
         self.keep_raw_images = rvc4_cfg.keep_raw_images
         if "--htp_socs" in self.snpe_dlc_graph_prepare:
             i = self.snpe_dlc_graph_prepare.index("--htp_socs")
@@ -104,6 +108,12 @@ def calibrate(self, dlc_path: Path) -> Path:
         self._add_args(args, ["--input_dlc", dlc_path])
         self._add_args(args, ["--output_dlc", quantized_dlc_path])
 
+        if self.use_per_channel_quantization:
+            args.append("--use_per_channel_quantization")
+
+        if self.use_per_row_quantization:
+            args.append("--use_per_row_quantization")
+
         start_time = time.time()
         self._subprocess_run(
             ["snpe-dlc-quant", *args], meta_name="quantization_cmd"

@@ -329,6 +329,8 @@ class RVC4Config(TargetConfig):
     snpe_dlc_quant_args: List[str] = []
     snpe_dlc_graph_prepare_args: List[str] = []
     keep_raw_images: bool = False
+    use_per_channel_quantization: bool = True
+    use_per_row_quantization: bool = False
     htp_socs: List[
         Literal["sm8350", "sm8450", "sm8550", "sm8650", "qcs6490", "qcs8550"]
     ] = ["sm8550"]

@@ -188,3 +188,11 @@ stages:
       # Whether to include the raw images in the intermediate outputs.
       # Warning: the raw images can get very large.
       keep_raw_images: False
+
+      # Selects per-axis-element quantization for the weights
+      # and biases of certain layer types.
+      # Only Convolution, Deconvolution, and FullyConnected are supported.
+      use_per_channel_quantization: True
+
+      # Enables row wise quantization of Matmul and FullyConnected ops.
+      use_per_row_quantization: False
@@ -51,6 +51,8 @@
         "keep_raw_images": False,
         "htp_socs": ["sm8550"],
         "disable_calibration": False,
+        "use_per_channel_quantization": True,
+        "use_per_row_quantization": False,
     },
     "hailo": {
         "optimization_level": 2,