From f28ed386a0c6a309cee586b47791f30a2d767990 Mon Sep 17 00:00:00 2001
From: Joshua Lochner <admin@xenova.com>
Date: Fri, 28 Jun 2024 13:13:10 +0000
Subject: [PATCH 1/3] Add ONNX export support for RT-DETR models

---
 docs/source/exporters/onnx/overview.mdx |  1 +
 optimum/exporters/onnx/model_configs.py | 43 +++++++++++++++++++++++++
 optimum/exporters/tasks.py              |  5 +++
 optimum/utils/normalized_config.py      |  1 +
 4 files changed, 50 insertions(+)

diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx
index 747e1396fb..7376061d14 100644
--- a/docs/source/exporters/onnx/overview.mdx
+++ b/docs/source/exporters/onnx/overview.mdx
@@ -85,6 +85,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra
 - ResNet
 - Roberta
 - Roformer
+- RT-DETR
 - SAM
 - Segformer
 - SEW
diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index e23716d4b7..9e46bbbb78 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -787,6 +787,49 @@ def outputs(self) -> Dict[str, Dict[int, str]]:
             return super().outputs
 
 
+class RTDetrDummyInputGenerator(DummyVisionInputGenerator):
+    def __init__(
+        self,
+        task: str,
+        normalized_config: NormalizedVisionConfig,
+        batch_size: int = DEFAULT_DUMMY_SHAPES["batch_size"],
+        num_channels: int = DEFAULT_DUMMY_SHAPES["num_channels"],
+        width: int = DEFAULT_DUMMY_SHAPES["width"],
+        height: int = DEFAULT_DUMMY_SHAPES["height"],
+        **kwargs,
+    ):
+        super().__init__(
+            task=task,
+            normalized_config=normalized_config,
+            batch_size=batch_size,
+            num_channels=num_channels,
+            width=width,
+            height=height,
+            **kwargs,
+        )
+
+        from transformers.onnx.utils import get_preprocessor
+
+        preprocessor = get_preprocessor(normalized_config._name_or_path)
+        if preprocessor is not None and hasattr(preprocessor, "size"):
+            self.height = preprocessor.size.get("height", self.height)
+            self.width = preprocessor.size.get("width", self.width)
+
+    def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"):
+        input_ = super().generate(
+            input_name=input_name, framework=framework, int_dtype=int_dtype, float_dtype=float_dtype
+        )
+        return input_
+
+
+class RTDetrOnnxConfig(ViTOnnxConfig):
+    # OPSET=16 required. Otherwise we get the following error:
+    # torch.onnx.errors.UnsupportedOperatorError: Exporting the operator 'aten::grid_sampler' to ONNX opset version 12 is not supported. Support for this operator was added in version 16, try exporting with this version.
+    DEFAULT_ONNX_OPSET = 16
+    DUMMY_INPUT_GENERATOR_CLASSES = (RTDetrDummyInputGenerator, )
+    ATOL_FOR_VALIDATION = 1e-3
+
+
 class TableTransformerOnnxConfig(DetrOnnxConfig):
     pass
 
diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
index 2896842f93..b14297711c 100644
--- a/optimum/exporters/tasks.py
+++ b/optimum/exporters/tasks.py
@@ -951,6 +951,11 @@ class TasksManager:
             onnx="RoFormerOnnxConfig",
             tflite="RoFormerTFLiteConfig",
         ),
+        "rt-detr": supported_tasks_mapping(
+            "feature-extraction",
+            "object-detection",
+            onnx="RTDetrOnnxConfig",
+        ),
         "sam": supported_tasks_mapping(
             "feature-extraction",
             onnx="SamOnnxConfig",
diff --git a/optimum/utils/normalized_config.py b/optimum/utils/normalized_config.py
index 81207b7649..085651dd43 100644
--- a/optimum/utils/normalized_config.py
+++ b/optimum/utils/normalized_config.py
@@ -216,6 +216,7 @@ class NormalizedConfigManager:
         'owlvit',
         'perceiver',
         'roformer',
+        'rt-detr',
         'squeezebert',
         'table-transformer',
     """

From 16d03eb979abe69ba69ad5c78f30b18d39553340 Mon Sep 17 00:00:00 2001
From: Joshua Lochner <admin@xenova.com>
Date: Fri, 28 Jun 2024 15:42:31 +0000
Subject: [PATCH 2/3] Keep CHW axes static

---
 optimum/exporters/onnx/model_configs.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
index 9e46bbbb78..4d9290afea 100644
--- a/optimum/exporters/onnx/model_configs.py
+++ b/optimum/exporters/onnx/model_configs.py
@@ -829,6 +829,10 @@ class RTDetrOnnxConfig(ViTOnnxConfig):
     DUMMY_INPUT_GENERATOR_CLASSES = (RTDetrDummyInputGenerator, )
     ATOL_FOR_VALIDATION = 1e-3
 
+    @property
+    def inputs(self) -> Dict[str, Dict[int, str]]:
+        return {"pixel_values": {0: "batch_size"}}
+
 
 class TableTransformerOnnxConfig(DetrOnnxConfig):
     pass

From b3f425992ea87aeb05879734f217e6edce8fc569 Mon Sep 17 00:00:00 2001
From: Joshua Lochner <admin@xenova.com>
Date: Fri, 28 Jun 2024 15:43:00 +0000
Subject: [PATCH 3/3] Add large test for RT-DETR

---
 tests/exporters/exporters_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py
index 0c52754ff6..7f2662c0c2 100644
--- a/tests/exporters/exporters_utils.py
+++ b/tests/exporters/exporters_utils.py
@@ -260,6 +260,7 @@
     "resnet": "microsoft/resnet-50",
     "roberta": "roberta-base",
     "roformer": "junnyu/roformer_chinese_base",
+    "rt-detr": "PekingU/rtdetr_r50vd",
     "sam": "facebook/sam-vit-base",
     "segformer": "nvidia/segformer-b0-finetuned-ade-512-512",
     "splinter": "hf-internal-testing/tiny-random-SplinterModel",