From f28ed386a0c6a309cee586b47791f30a2d767990 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Fri, 28 Jun 2024 13:13:10 +0000 Subject: [PATCH 1/3] Add ONNX export support for RT-DETR models --- docs/source/exporters/onnx/overview.mdx | 1 + optimum/exporters/onnx/model_configs.py | 43 +++++++++++++++++++++++++ optimum/exporters/tasks.py | 5 +++ optimum/utils/normalized_config.py | 1 + 4 files changed, 50 insertions(+) diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx index 747e1396fb..7376061d14 100644 --- a/docs/source/exporters/onnx/overview.mdx +++ b/docs/source/exporters/onnx/overview.mdx @@ -85,6 +85,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra - ResNet - Roberta - Roformer +- RT-DETR - SAM - Segformer - SEW diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index e23716d4b7..9e46bbbb78 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -787,6 +787,49 @@ def outputs(self) -> Dict[str, Dict[int, str]]: return super().outputs +class RTDetrDummyInputGenerator(DummyVisionInputGenerator): + def __init__( + self, + task: str, + normalized_config: NormalizedVisionConfig, + batch_size: int = DEFAULT_DUMMY_SHAPES["batch_size"], + num_channels: int = DEFAULT_DUMMY_SHAPES["num_channels"], + width: int = DEFAULT_DUMMY_SHAPES["width"], + height: int = DEFAULT_DUMMY_SHAPES["height"], + **kwargs, + ): + super().__init__( + task=task, + normalized_config=normalized_config, + batch_size=batch_size, + num_channels=num_channels, + width=width, + height=height, + **kwargs, + ) + + from transformers.onnx.utils import get_preprocessor + + preprocessor = get_preprocessor(normalized_config._name_or_path) + if preprocessor is not None and hasattr(preprocessor, "size"): + self.height = preprocessor.size.get("height", self.height) + self.width = preprocessor.size.get("width", self.width) + + def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): + input_ = super().generate( + input_name=input_name, framework=framework, int_dtype=int_dtype, float_dtype=float_dtype + ) + return input_ + + +class RTDetrOnnxConfig(ViTOnnxConfig): + # OPSET=16 required. Otherwise we get the following error: + # torch.onnx.errors.UnsupportedOperatorError: Exporting the operator 'aten::grid_sampler' to ONNX opset version 12 is not supported. Support for this operator was added in version 16, try exporting with this version. + DEFAULT_ONNX_OPSET = 16 + DUMMY_INPUT_GENERATOR_CLASSES = (RTDetrDummyInputGenerator, ) + ATOL_FOR_VALIDATION = 1e-3 + + class TableTransformerOnnxConfig(DetrOnnxConfig): pass diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index 2896842f93..b14297711c 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -951,6 +951,11 @@ class TasksManager: onnx="RoFormerOnnxConfig", tflite="RoFormerTFLiteConfig", ), + "rt-detr": supported_tasks_mapping( + "feature-extraction", + "object-detection", + onnx="RTDetrOnnxConfig", + ), "sam": supported_tasks_mapping( "feature-extraction", onnx="SamOnnxConfig", diff --git a/optimum/utils/normalized_config.py b/optimum/utils/normalized_config.py index 81207b7649..085651dd43 100644 --- a/optimum/utils/normalized_config.py +++ b/optimum/utils/normalized_config.py @@ -216,6 +216,7 @@ class NormalizedConfigManager: 'owlvit', 'perceiver', 'roformer', + 'rt-detr', 'squeezebert', 'table-transformer', """ From 16d03eb979abe69ba69ad5c78f30b18d39553340 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Fri, 28 Jun 2024 15:42:31 +0000 Subject: [PATCH 2/3] Keep CHW axes static --- optimum/exporters/onnx/model_configs.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 9e46bbbb78..4d9290afea 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -829,6 +829,10 @@ class RTDetrOnnxConfig(ViTOnnxConfig): DUMMY_INPUT_GENERATOR_CLASSES = (RTDetrDummyInputGenerator, ) ATOL_FOR_VALIDATION = 1e-3 + @property + def inputs(self) -> Dict[str, Dict[int, str]]: + return {"pixel_values": {0: "batch_size"}} + class TableTransformerOnnxConfig(DetrOnnxConfig): pass From b3f425992ea87aeb05879734f217e6edce8fc569 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Fri, 28 Jun 2024 15:43:00 +0000 Subject: [PATCH 3/3] Add large test for RT-DETR --- tests/exporters/exporters_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index 0c52754ff6..7f2662c0c2 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -260,6 +260,7 @@ "resnet": "microsoft/resnet-50", "roberta": "roberta-base", "roformer": "junnyu/roformer_chinese_base", + "rt-detr": "PekingU/rtdetr_r50vd", "sam": "facebook/sam-vit-base", "segformer": "nvidia/segformer-b0-finetuned-ade-512-512", "splinter": "hf-internal-testing/tiny-random-SplinterModel",