From 3b805a1bb559a542710281209902bbc61bde4f38 Mon Sep 17 00:00:00 2001 From: dme-compunet <117437050+dme-compunet@users.noreply.github.com> Date: Sun, 15 Sep 2024 17:23:02 +0300 Subject: [PATCH] Update code to v5.0.0 --- README.md | 74 +++--- Source/YoloV8.Demo/Program.cs | 20 +- Source/YoloV8.Demo/YoloV8.Demo.csproj | 2 +- Source/YoloV8.Tests/NonMaxSuppressionTests.cs | 30 +-- Source/YoloV8.Tests/Predictors.cs | 18 +- Source/YoloV8.Tests/Usings.cs | 6 +- .../{YoloV8Tests.cs => YoloPredictorTests.cs} | 30 +-- Source/YoloV8.Tests/YoloV8.Tests.csproj | 2 +- Source/YoloV8/AssemblyAttributes.cs | 3 - Source/YoloV8/Base/PostprocessContext.cs | 5 - Source/YoloV8/Base/YoloV8Configuration.cs | 22 -- Source/YoloV8/Base/YoloV8Extensions.cs | 197 ---------------- Source/YoloV8/Base/YoloV8Predictor.cs | 107 --------- Source/YoloV8/Builder/IYoloV8Builder.cs | 29 --- Source/YoloV8/Builder/YoloV8Builder.cs | 79 ------- .../Services/IMemoryAllocatorService.cs | 6 + .../Services/INonMaxSupressionService.cs | 6 + .../Contracts/Services/IPreprocessService.cs | 6 + .../Services/ISessionRunnerService.cs | 6 + .../Contracts/Services/Parsing/IParser{T}.cs | 6 + .../Services/Parsing/IRawBoundingBoxParser.cs | 10 + Source/YoloV8/Data/BoundingBox.cs | 15 -- Source/YoloV8/Data/Classification.cs | 7 + Source/YoloV8/Data/ClassificationResult.cs | 13 -- Source/YoloV8/Data/Detection.cs | 8 + Source/YoloV8/Data/DetectionResult.cs | 8 - Source/YoloV8/Data/IYoloPrediction{TSelf}.cs | 6 + Source/YoloV8/Data/ObbBoundingBox.cs | 6 - Source/YoloV8/Data/ObbDetection.cs | 8 + Source/YoloV8/Data/ObbDetectionResult.cs | 8 - Source/YoloV8/Data/Pose.cs | 22 ++ Source/YoloV8/Data/PoseBoundingBox.cs | 11 - Source/YoloV8/Data/PoseResult.cs | 8 - Source/YoloV8/Data/Segmentation.cs | 8 + Source/YoloV8/Data/SegmentationBoundingBox.cs | 6 - .../Data/{Mask.cs => SegmentationMask.cs} | 0 Source/YoloV8/Data/SegmentationResult.cs | 8 - Source/YoloV8/{Timing => Data}/SpeedResult.cs | 2 +- ...{ClassProbability.cs => YoloPrediction.cs} | 4 +- .../YoloV8/Data/YoloPredictionExtensions.cs | 6 + .../Data/{YoloV8Result.cs => YoloResult.cs} | 4 +- Source/YoloV8/Data/YoloResult{T}.cs | 24 ++ ...ensions.cs => DetectionBoxesExtensions.cs} | 6 +- .../YoloV8/Extensions/ImageSharpExtensions.cs | 35 +-- .../OrientedBoundingBoxExtensions.cs | 28 +-- Source/YoloV8/Global/Assembly.cs | 1 + Source/YoloV8/{ => Global}/Usings.cs | 10 +- Source/YoloV8/Memory/DenseTensorOwner.cs | 24 ++ .../Memory/MemoryAllocatorExtensions.cs | 11 + Source/YoloV8/Memory/TensorShape.cs | 29 +++ Source/YoloV8/Memory/YoloRawOutput.cs | 42 ++++ Source/YoloV8/Metadata/SessionTensorInfo.cs | 24 ++ Source/YoloV8/Metadata/YoloArchitecture.cs | 7 + Source/YoloV8/Metadata/YoloMetadata.cs | 105 +++++++++ .../Metadata/{YoloV8Class.cs => YoloName.cs} | 2 +- Source/YoloV8/Metadata/YoloPoseMetadata.cs | 25 ++ .../Metadata/{YoloV8Task.cs => YoloTask.cs} | 2 +- Source/YoloV8/Metadata/YoloV8Metadata.cs | 120 ---------- Source/YoloV8/Metadata/YoloV8PoseMetadata.cs | 25 -- .../YoloV8/Parsers/DetectionOutputParser.cs | 25 -- Source/YoloV8/Parsers/IndexedBoundingBox.cs | 16 -- .../Parsers/IndexedBoundingBoxParser.cs | 136 ----------- .../Parsers/ObbDetectionOutputParser.cs | 26 --- .../YoloV8/Parsers/ObbIndexedBoundingBox.cs | 18 -- .../Parsers/ObbIndexedBoundingBoxParser.cs | 143 ------------ Source/YoloV8/Parsers/PoseOutputParser.cs | 78 ------- .../Parsers/SegmentationOutputParser.cs | 156 ------------- Source/YoloV8/Parsing/IRawBoundingBox.cs | 18 ++ Source/YoloV8/Parsing/RawBoundingBox.cs | 92 ++++++++ Source/YoloV8/Parsing/RawObbBoundingBox.cs | 102 ++++++++ Source/YoloV8/Parsing/RawParsingContext.cs | 14 ++ Source/YoloV8/Plotting/ColorPalette.cs | 6 +- .../ClassificationPlottingOptions.cs | 0 .../{ => Options}/DetectionPlottingOptions.cs | 9 +- .../Plotting/{ => Options}/PlottingOptions.cs | 2 +- .../{ => Options}/PosePlottingOptions.cs | 0 .../SegmentationPlottingOptions.cs | 0 .../Plotting/PlottingAsyncExtensions.cs | 29 +++ .../PlottingAsyncOperationExtensions.cs | 30 --- Source/YoloV8/Plotting/PlottingExtensions.cs | 218 +++++++++--------- .../PredictorPlottingExtensions.cs} | 89 ++++--- .../PredictorTimer.cs} | 16 +- Source/YoloV8/Predictor/ServiceResolver.cs | 122 ++++++++++ Source/YoloV8/Predictor/YoloConfiguration.cs | 67 ++++++ Source/YoloV8/Predictor/YoloPredictor.cs | 120 ++++++++++ .../Predictor/YoloPredictorAsyncExtensions.cs | 129 +++++++++++ .../Predictor/YoloPredictorExtensions.cs | 124 ++++++++++ .../YoloV8/Predictor/YoloPredictorOptions.cs | 34 +++ Source/YoloV8/Selectors/BinarySelector.cs | 33 --- Source/YoloV8/Selectors/ImageSelector.cs | 24 -- .../YoloV8/Selectors/ImageSelector{TPixel}.cs | 41 ---- .../YoloV8/Services/MemoryAllocatorService.cs | 51 ++++ .../Services/NonMaxSuppressionService.cs | 53 +++++ .../Services/Parsers/ClassificationParser.cs | 27 +++ .../Services/Parsers/DetectionParser.cs | 25 ++ .../Services/Parsers/ObbDetectionParser.cs | 26 +++ Source/YoloV8/Services/Parsers/PoseParser.cs | 83 +++++++ .../Services/Parsers/RawBoundingBoxParser.cs | 195 ++++++++++++++++ .../Services/Parsers/SegmentationParser.cs | 170 ++++++++++++++ Source/YoloV8/Services/PreprocessService.cs | 75 ++++++ .../YoloV8/Services/SerssionRunnerService.cs | 139 +++++++++++ .../YoloV8/Utilities/ImageContoursDetector.cs | 52 +++-- .../Utilities/NonMaxSuppressionHelper.cs | 96 -------- .../Utilities/ObbNonMaxSuppressionHelper.cs | 111 --------- Source/YoloV8/Utilities/PreprocessHelper.cs | 67 ------ Source/YoloV8/Utilities/Vector.cs | 12 + Source/YoloV8/YoloV8.csproj | 5 +- 107 files changed, 2440 insertions(+), 2006 deletions(-) rename Source/YoloV8.Tests/{YoloV8Tests.cs => YoloPredictorTests.cs} (67%) delete mode 100644 Source/YoloV8/AssemblyAttributes.cs delete mode 100644 Source/YoloV8/Base/PostprocessContext.cs delete mode 100644 Source/YoloV8/Base/YoloV8Configuration.cs delete mode 100644 Source/YoloV8/Base/YoloV8Extensions.cs delete mode 100644 Source/YoloV8/Base/YoloV8Predictor.cs delete mode 100644 Source/YoloV8/Builder/IYoloV8Builder.cs delete mode 100644 Source/YoloV8/Builder/YoloV8Builder.cs create mode 100644 Source/YoloV8/Contracts/Services/IMemoryAllocatorService.cs create mode 100644 Source/YoloV8/Contracts/Services/INonMaxSupressionService.cs create mode 100644 Source/YoloV8/Contracts/Services/IPreprocessService.cs create mode 100644 Source/YoloV8/Contracts/Services/ISessionRunnerService.cs create mode 100644 Source/YoloV8/Contracts/Services/Parsing/IParser{T}.cs create mode 100644 Source/YoloV8/Contracts/Services/Parsing/IRawBoundingBoxParser.cs delete mode 100644 Source/YoloV8/Data/BoundingBox.cs create mode 100644 Source/YoloV8/Data/Classification.cs delete mode 100644 Source/YoloV8/Data/ClassificationResult.cs create mode 100644 Source/YoloV8/Data/Detection.cs delete mode 100644 Source/YoloV8/Data/DetectionResult.cs create mode 100644 Source/YoloV8/Data/IYoloPrediction{TSelf}.cs delete mode 100644 Source/YoloV8/Data/ObbBoundingBox.cs create mode 100644 Source/YoloV8/Data/ObbDetection.cs delete mode 100644 Source/YoloV8/Data/ObbDetectionResult.cs create mode 100644 Source/YoloV8/Data/Pose.cs delete mode 100644 Source/YoloV8/Data/PoseBoundingBox.cs delete mode 100644 Source/YoloV8/Data/PoseResult.cs create mode 100644 Source/YoloV8/Data/Segmentation.cs delete mode 100644 Source/YoloV8/Data/SegmentationBoundingBox.cs rename Source/YoloV8/Data/{Mask.cs => SegmentationMask.cs} (100%) delete mode 100644 Source/YoloV8/Data/SegmentationResult.cs rename Source/YoloV8/{Timing => Data}/SpeedResult.cs (93%) rename Source/YoloV8/Data/{ClassProbability.cs => YoloPrediction.cs} (68%) create mode 100644 Source/YoloV8/Data/YoloPredictionExtensions.cs rename Source/YoloV8/Data/{YoloV8Result.cs => YoloResult.cs} (53%) create mode 100644 Source/YoloV8/Data/YoloResult{T}.cs rename Source/YoloV8/Extensions/{BoundingBoxesExtensions.cs => DetectionBoxesExtensions.cs} (61%) create mode 100644 Source/YoloV8/Global/Assembly.cs rename Source/YoloV8/{ => Global}/Usings.cs (67%) create mode 100644 Source/YoloV8/Memory/DenseTensorOwner.cs create mode 100644 Source/YoloV8/Memory/MemoryAllocatorExtensions.cs create mode 100644 Source/YoloV8/Memory/TensorShape.cs create mode 100644 Source/YoloV8/Memory/YoloRawOutput.cs create mode 100644 Source/YoloV8/Metadata/SessionTensorInfo.cs create mode 100644 Source/YoloV8/Metadata/YoloArchitecture.cs create mode 100644 Source/YoloV8/Metadata/YoloMetadata.cs rename Source/YoloV8/Metadata/{YoloV8Class.cs => YoloName.cs} (81%) create mode 100644 Source/YoloV8/Metadata/YoloPoseMetadata.cs rename Source/YoloV8/Metadata/{YoloV8Task.cs => YoloTask.cs} (81%) delete mode 100644 Source/YoloV8/Metadata/YoloV8Metadata.cs delete mode 100644 Source/YoloV8/Metadata/YoloV8PoseMetadata.cs delete mode 100644 Source/YoloV8/Parsers/DetectionOutputParser.cs delete mode 100644 Source/YoloV8/Parsers/IndexedBoundingBox.cs delete mode 100644 Source/YoloV8/Parsers/IndexedBoundingBoxParser.cs delete mode 100644 Source/YoloV8/Parsers/ObbDetectionOutputParser.cs delete mode 100644 Source/YoloV8/Parsers/ObbIndexedBoundingBox.cs delete mode 100644 Source/YoloV8/Parsers/ObbIndexedBoundingBoxParser.cs delete mode 100644 Source/YoloV8/Parsers/PoseOutputParser.cs delete mode 100644 Source/YoloV8/Parsers/SegmentationOutputParser.cs create mode 100644 Source/YoloV8/Parsing/IRawBoundingBox.cs create mode 100644 Source/YoloV8/Parsing/RawBoundingBox.cs create mode 100644 Source/YoloV8/Parsing/RawObbBoundingBox.cs create mode 100644 Source/YoloV8/Parsing/RawParsingContext.cs rename Source/YoloV8/Plotting/{ => Options}/ClassificationPlottingOptions.cs (100%) rename Source/YoloV8/Plotting/{ => Options}/DetectionPlottingOptions.cs (66%) rename Source/YoloV8/Plotting/{ => Options}/PlottingOptions.cs (96%) rename Source/YoloV8/Plotting/{ => Options}/PosePlottingOptions.cs (100%) rename Source/YoloV8/Plotting/{ => Options}/SegmentationPlottingOptions.cs (100%) create mode 100644 Source/YoloV8/Plotting/PlottingAsyncExtensions.cs delete mode 100644 Source/YoloV8/Plotting/PlottingAsyncOperationExtensions.cs rename Source/YoloV8/{Base/YoloV8PlottingExtensions.cs => Plotting/PredictorPlottingExtensions.cs} (57%) rename Source/YoloV8/{Timing/SpeedTimer.cs => Predictor/PredictorTimer.cs} (59%) create mode 100644 Source/YoloV8/Predictor/ServiceResolver.cs create mode 100644 Source/YoloV8/Predictor/YoloConfiguration.cs create mode 100644 Source/YoloV8/Predictor/YoloPredictor.cs create mode 100644 Source/YoloV8/Predictor/YoloPredictorAsyncExtensions.cs create mode 100644 Source/YoloV8/Predictor/YoloPredictorExtensions.cs create mode 100644 Source/YoloV8/Predictor/YoloPredictorOptions.cs delete mode 100644 Source/YoloV8/Selectors/BinarySelector.cs delete mode 100644 Source/YoloV8/Selectors/ImageSelector.cs delete mode 100644 Source/YoloV8/Selectors/ImageSelector{TPixel}.cs create mode 100644 Source/YoloV8/Services/MemoryAllocatorService.cs create mode 100644 Source/YoloV8/Services/NonMaxSuppressionService.cs create mode 100644 Source/YoloV8/Services/Parsers/ClassificationParser.cs create mode 100644 Source/YoloV8/Services/Parsers/DetectionParser.cs create mode 100644 Source/YoloV8/Services/Parsers/ObbDetectionParser.cs create mode 100644 Source/YoloV8/Services/Parsers/PoseParser.cs create mode 100644 Source/YoloV8/Services/Parsers/RawBoundingBoxParser.cs create mode 100644 Source/YoloV8/Services/Parsers/SegmentationParser.cs create mode 100644 Source/YoloV8/Services/PreprocessService.cs create mode 100644 Source/YoloV8/Services/SerssionRunnerService.cs delete mode 100644 Source/YoloV8/Utilities/NonMaxSuppressionHelper.cs delete mode 100644 Source/YoloV8/Utilities/ObbNonMaxSuppressionHelper.cs delete mode 100644 Source/YoloV8/Utilities/PreprocessHelper.cs create mode 100644 Source/YoloV8/Utilities/Vector.cs diff --git a/README.md b/README.md index b10dd6c..bd91bf3 100644 --- a/README.md +++ b/README.md @@ -1,69 +1,78 @@ -# YOLOv8 +# YoloV8 -Use [YOLOv8](https://github.com/ultralytics/ultralytics) in real-time for object detection, instance segmentation, pose estimation and image classification, via [ONNX Runtime](https://github.com/microsoft/onnxruntime) +Integrate [YOLOv8](https://github.com/ultralytics/ultralytics) into your C# project for a variety of real-time tasks including object detection, instance segmentation, pose estimation and more, using ONNX Runtime. -# Install +# Features +- **YOLOv8 Tasks** 🌟 Support for all YOLOv8 tasks ([Detect](https://docs.ultralytics.com/tasks/detect), [Segment](https://docs.ultralytics.com/tasks/segment), [Classify](https://docs.ultralytics.com/tasks/classify), [Pose](https://docs.ultralytics.com/tasks/pose) and [OBB](https://docs.ultralytics.com/tasks/obb)) +- **High Performance** 🚀 Various techniques and use of .NET features to maximize performance +- **Reduced Memory Usage** 🧠 By reusing memory blocks and reducing the pressure on the GC +- **Plotting Options** 📊 Plotting operations for preview of model results on the target image. +- **YOLOv10 Support** 🔧 Includes additional support for [YOLOv10](https://docs.ultralytics.com/models/yolov10) -The `YoloV8` project is available in two nuget packages: [YoloV8](https://www.nuget.org/packages/YoloV8) and [YoloV8.Gpu](https://www.nuget.org/packages/YoloV8.Gpu), if you use with CPU add the [YoloV8](https://www.nuget.org/packages/YoloV8) package reference to your project (contains reference to [Microsoft.ML.OnnxRuntime](https://www.nuget.org/packages/Microsoft.ML.OnnxRuntime) package) +# Installation +This project provides two NuGet packages: +- For CPU inference, use the package: [YoloV8](https://www.nuget.org/packages/YoloV8) (includes the [Microsoft.ML.OnnxRuntime](https://www.nuget.org/packages/Microsoft.ML.OnnxRuntime) package) +- For GPU inference, use the package: [YoloV8.Gpu](https://www.nuget.org/packages/YoloV8.Gpu) (includes the [Microsoft.ML.OnnxRuntime.Gpu](https://www.nuget.org/packages/Microsoft.ML.OnnxRuntime.Gpu) package) -```shell -dotnet add package YoloV8 -``` +# Usage -If you use with GPU you can add the [YoloV8.Gpu](https://www.nuget.org/packages/YoloV8.Gpu) package reference (contains reference to [Microsoft.ML.OnnxRuntime.Gpu](https://www.nuget.org/packages/Microsoft.ML.OnnxRuntime.Gpu) package) - -```shell -dotnet add package YoloV8.Gpu -``` - -# Use - -### Export the model from PyTorch to ONNX format: - -Run this python code to export the model in ONNX format: +### 1. Export model to ONNX format: +For convert the pre-trained PyTorch model to ONNX format, run the following Python code: ```python from ultralytics import YOLO # Load a model -model = YOLO('path/to/best') +model = YOLO('path/to/best.pt') -# export the model to ONNX format +# Export the model to ONNX format model.export(format='onnx') ``` -### Use in exported model with C#: +### 2. Load the ONNX model with C#: +Add the `YoloV8` (or `YoloV8.Gpu`) package to your project: +```shell +dotnet add package YoloV8 +``` + +Use the following C# code to load the model and run basic prediction: ```csharp using Compunet.YoloV8; -using SixLabors.ImageSharp; -using var predictor = YoloV8Predictor.Create("path/to/model"); +// Load the YOLOv8 predictor +using var predictor = new YoloPredictor("path/to/model.onnx"); -var result = predictor.Detect("path/to/image"); +// Run model +var result = predictor.Detect("path/to/image.jpg"); // or -var result = await predictor.DetectAsync("path/to/image"); +var result = await predictor.DetectAsync("path/to/image.jpg"); +// Write result summary to terminal Console.WriteLine(result); ``` - # Plotting -You can to plot the input image for preview the model prediction results, this code demonstrates how to perform a prediction, plot the results and save to file: +You can to plot the target image for preview the model results, this code demonstrates how to run a inference, plot the results on image and save to file: ```csharp using Compunet.YoloV8; using Compunet.YoloV8.Plotting; using SixLabors.ImageSharp; -using var image = Image.Load("path/to/image"); +// Load the YOLOv8 predictor +using var predictor = new YoloPredictor("path/to/model.onnx"); -using var predictor = YoloV8Predictor.Create("path/to/model"); +// Load the target image +using var image = Image.Load("path/to/image"); +// Run model var result = await predictor.PoseAsync(image); +// Create plotted image from model results using var plotted = await result.PlotImageAsync(image); +// Write the plotted image to file plotted.Save("./pose_demo.jpg") ``` @@ -74,12 +83,13 @@ using Compunet.YoloV8; using Compunet.YoloV8.Plotting; using SixLabors.ImageSharp; +// Load the YOLOv8 predictor using var predictor = YoloV8Predictor.Create("path/to/model"); +// Run model, plot predictions and write to file predictor.PredictAndSaveAsync("path/to/image"); ``` - -## Demo Images: +## Example Images: #### Detection: @@ -96,3 +106,5 @@ predictor.PredictAndSaveAsync("path/to/image"); # License AGPL-3.0 License + +**Important Note:** This project depends on ImageSharp, you should check the license details [here](https://github.com/SixLabors/ImageSharp/blob/main/LICENSE) \ No newline at end of file diff --git a/Source/YoloV8.Demo/Program.cs b/Source/YoloV8.Demo/Program.cs index c028739..a627164 100644 --- a/Source/YoloV8.Demo/Program.cs +++ b/Source/YoloV8.Demo/Program.cs @@ -2,16 +2,18 @@ using System.Diagnostics; Console.WriteLine("Loading pose estimation model..."); -using var posePredictor = YoloV8Predictor.Create("./models/yolov8n-pose-uint8.onnx"); +using var posePredictor = new YoloPredictor("./models/yolov8n-pose-uint8.onnx"); Console.WriteLine("Loading detection model..."); -using var detectPredictor = YoloV8Predictor.Create("./models/yolov8n-uint8.onnx"); +using var detectPredictor = new YoloPredictor("./models/yolov8n-uint8.onnx"); Console.WriteLine("Loading segmentation model..."); -using var segmentPredictor = YoloV8Predictor.Create("./models/yolov8n-seg-uint8.onnx"); +using var segmentPredictor = new YoloPredictor("./models/yolov8n-seg-uint8.onnx"); Console.WriteLine("Loading classification model..."); -using var classifyPredictor = YoloV8Predictor.Create("./models/yolov8n-cls-uint8.onnx"); +using var classifyPredictor = new YoloPredictor("./models/yolov8n-cls-uint8.onnx"); + +Console.WriteLine(); await PredictAndSaveAsync(posePredictor, "bus.jpg"); await PredictAndSaveAsync(posePredictor, "sports.jpg"); @@ -34,15 +36,17 @@ }); } -static async Task PredictAndSaveAsync(YoloV8Predictor predictor, string image) +static async Task PredictAndSaveAsync(YoloPredictor predictor, string image) { var path = $"./images/{image}"; + var task = predictor.Metadata.Task; + + Console.WriteLine($"Running '{image}' (test: {task})..."); var result = await predictor.PredictAndSaveAsync(path); - Console.WriteLine(); - Console.WriteLine($"Task: {predictor.Metadata.Task}"); - Console.WriteLine($"Image: {image}"); Console.WriteLine($"Result: {result}"); Console.WriteLine($"Speed: {result.Speed}"); + + Console.WriteLine(); } \ No newline at end of file diff --git a/Source/YoloV8.Demo/YoloV8.Demo.csproj b/Source/YoloV8.Demo/YoloV8.Demo.csproj index f04bab2..02832d7 100644 --- a/Source/YoloV8.Demo/YoloV8.Demo.csproj +++ b/Source/YoloV8.Demo/YoloV8.Demo.csproj @@ -14,7 +14,7 @@ - + diff --git a/Source/YoloV8.Tests/NonMaxSuppressionTests.cs b/Source/YoloV8.Tests/NonMaxSuppressionTests.cs index 7608a01..3d1f904 100644 --- a/Source/YoloV8.Tests/NonMaxSuppressionTests.cs +++ b/Source/YoloV8.Tests/NonMaxSuppressionTests.cs @@ -1,46 +1,50 @@ -namespace YoloV8.Tests; +using Compunet.YoloV8.Parsing; + +namespace YoloV8.Tests; public class NonMaxSuppressionTests { [Fact] public void NonMaxSuppressionBasicTest() { - var classA = new YoloV8Class(0, "a"); - var classB = new YoloV8Class(1, "b"); + var nonMaxSuppression = new NonMaxSuppressionService(); + + var classA = new YoloName(0, "a"); + var classB = new YoloName(1, "b"); - IndexedBoundingBox[] boxes = + RawBoundingBox[] boxes = [ - new IndexedBoundingBox + new RawBoundingBox { Index = 0, - Class = classA, + Name = classA, Bounds = new Rectangle(0, 0, 50, 50), Confidence = .8f }, - new IndexedBoundingBox + new RawBoundingBox { Index = 1, - Class = classA, + Name = classA, Bounds = new Rectangle(0, 0, 50, 50), Confidence = .9f }, - new IndexedBoundingBox + new RawBoundingBox { Index = 2, - Class = classB, + Name = classB, Bounds = new Rectangle(0, 0, 50, 50), Confidence = .9f }, - new IndexedBoundingBox + new RawBoundingBox { Index = 3, - Class = classA, + Name = classA, Bounds = new Rectangle(50, 50, 50, 50), Confidence = .5f }, ]; - var selected = NonMaxSuppressionHelper.Suppress(boxes, .5f); + var selected = nonMaxSuppression.Suppress(boxes.AsSpan(), .5f); Assert.Equal([1, 2, 3], selected.Select(x => x.Index).Order()); } diff --git a/Source/YoloV8.Tests/Predictors.cs b/Source/YoloV8.Tests/Predictors.cs index 7f848c0..b7d869f 100644 --- a/Source/YoloV8.Tests/Predictors.cs +++ b/Source/YoloV8.Tests/Predictors.cs @@ -2,19 +2,19 @@ public static class Predictors { - public static readonly YoloV8Predictor Pose = YoloV8Predictor.Create("./models/yolov8n-pose-uint8.onnx"); - public static readonly YoloV8Predictor Detection = YoloV8Predictor.Create("./models/yolov8n-uint8.onnx"); - public static readonly YoloV8Predictor Segmentation = YoloV8Predictor.Create("./models/yolov8n-seg-uint8.onnx"); - public static readonly YoloV8Predictor Classification = YoloV8Predictor.Create("./models/yolov8n-cls-uint8.onnx"); + public static readonly YoloPredictor Pose = new("./models/yolov8n-pose-uint8.onnx"); + public static readonly YoloPredictor Detection = new("./models/yolov8n-uint8.onnx"); + public static readonly YoloPredictor Segmentation = new("./models/yolov8n-seg-uint8.onnx"); + public static readonly YoloPredictor Classification = new("./models/yolov8n-cls-uint8.onnx"); - public static YoloV8Predictor GetPredictor(YoloV8Task task) + public static YoloPredictor GetPredictor(YoloTask task) { return task switch { - YoloV8Task.Pose => Pose, - YoloV8Task.Detect => Detection, - YoloV8Task.Segment => Segmentation, - YoloV8Task.Classify => Classification, + YoloTask.Pose => Pose, + YoloTask.Detect => Detection, + YoloTask.Segment => Segmentation, + YoloTask.Classify => Classification, _ => throw new InvalidEnumArgumentException() }; } diff --git a/Source/YoloV8.Tests/Usings.cs b/Source/YoloV8.Tests/Usings.cs index 7e11d2d..fe62314 100644 --- a/Source/YoloV8.Tests/Usings.cs +++ b/Source/YoloV8.Tests/Usings.cs @@ -1,7 +1,7 @@ global using Compunet.YoloV8; global using Compunet.YoloV8.Metadata; -global using Compunet.YoloV8.Parsers; -global using Compunet.YoloV8.Utilities; +global using Compunet.YoloV8.Services; global using SixLabors.ImageSharp; global using System.ComponentModel; -global using Xunit; \ No newline at end of file +global using Xunit; + diff --git a/Source/YoloV8.Tests/YoloV8Tests.cs b/Source/YoloV8.Tests/YoloPredictorTests.cs similarity index 67% rename from Source/YoloV8.Tests/YoloV8Tests.cs rename to Source/YoloV8.Tests/YoloPredictorTests.cs index 7d290d0..9b6f808 100644 --- a/Source/YoloV8.Tests/YoloV8Tests.cs +++ b/Source/YoloV8.Tests/YoloPredictorTests.cs @@ -1,19 +1,19 @@ namespace YoloV8.Tests; -public class YoloV8Tests +public class YoloPredictorTests { [Theory] [InlineData("bus.jpg", 3)] [InlineData("sports.jpg", 3)] public void PoseTest(string image, int count) { - var predictor = Predictors.GetPredictor(YoloV8Task.Pose); + var predictor = Predictors.GetPredictor(YoloTask.Pose); image = GetImagePath(image); var result = predictor.Pose(image); - Assert.Equal(count, result.Boxes.Count()); + Assert.Equal(count, result.Count); } [Theory] @@ -21,7 +21,7 @@ public void PoseTest(string image, int count) [InlineData("sports.jpg", "person:2;sports ball:1;baseball bat:1;baseball glove:2")] public void DetectionTest(string image, string objects) { - var predictor = Predictors.GetPredictor(YoloV8Task.Detect); + var predictor = Predictors.GetPredictor(YoloTask.Detect); image = GetImagePath(image); @@ -39,11 +39,11 @@ public void DetectionTest(string image, string objects) list.Add((name, count)); } - Assert.Equal(list.Sum(x => x.count), result.Boxes.Length); + Assert.Equal(list.Sum(x => x.count), result.Count); foreach (var (name, count) in list) { - Assert.Equal(count, result.Boxes.Where(x => x.Class.Name == name).Count()); + Assert.Equal(count, result.Where(x => x.Name.Name == name).Count()); } } @@ -53,21 +53,21 @@ public void DetectionTest(string image, string objects) [InlineData("toaster.jpg", "toaster")] public void ClassificationTest(string image, string label) { - var predictor = Predictors.GetPredictor(YoloV8Task.Classify); + var predictor = Predictors.GetPredictor(YoloTask.Classify); image = GetImagePath(image); var result = predictor.Classify(image); - Assert.Equal(result.TopClass.Name.Name, label); + Assert.Equal(result[0].Name.Name, label); } [Theory] - [InlineData(YoloV8Task.Pose, 1, 640)] - [InlineData(YoloV8Task.Detect, 80, 640)] - [InlineData(YoloV8Task.Segment, 80, 640)] - [InlineData(YoloV8Task.Classify, 1000, 224)] - public void MetadataTest(YoloV8Task task, int classesCount, int imageSize) + [InlineData(YoloTask.Pose, 1, 640)] + [InlineData(YoloTask.Detect, 80, 640)] + [InlineData(YoloTask.Segment, 80, 640)] + [InlineData(YoloTask.Classify, 1000, 224)] + public void MetadataTest(YoloTask task, int classesCount, int imageSize) { var metadata = Predictors.GetPredictor(task).Metadata; @@ -77,12 +77,12 @@ public void MetadataTest(YoloV8Task task, int classesCount, int imageSize) Assert.Equal(task, metadata.Task); - Assert.Equal(1, metadata.Batch); + Assert.Equal(1, metadata.BatchSize); Assert.Equal(imageSize, metadata.ImageSize.Width); Assert.Equal(imageSize, metadata.ImageSize.Height); - Assert.Equal(classesCount, metadata.Names.Count); + Assert.Equal(classesCount, metadata.Names.Length); } private static string GetImagePath(string image) diff --git a/Source/YoloV8.Tests/YoloV8.Tests.csproj b/Source/YoloV8.Tests/YoloV8.Tests.csproj index 235fbd1..f28dbf3 100644 --- a/Source/YoloV8.Tests/YoloV8.Tests.csproj +++ b/Source/YoloV8.Tests/YoloV8.Tests.csproj @@ -13,7 +13,7 @@ - + diff --git a/Source/YoloV8/AssemblyAttributes.cs b/Source/YoloV8/AssemblyAttributes.cs deleted file mode 100644 index 252137e..0000000 --- a/Source/YoloV8/AssemblyAttributes.cs +++ /dev/null @@ -1,3 +0,0 @@ -using System.Runtime.CompilerServices; - -[assembly: InternalsVisibleTo("YoloV8.Tests")] \ No newline at end of file diff --git a/Source/YoloV8/Base/PostprocessContext.cs b/Source/YoloV8/Base/PostprocessContext.cs deleted file mode 100644 index 20d6b3e..0000000 --- a/Source/YoloV8/Base/PostprocessContext.cs +++ /dev/null @@ -1,5 +0,0 @@ -namespace Compunet.YoloV8; - -public delegate TResult PostprocessContext(IReadOnlyList outputs, - Size imageSize, - SpeedTimer timer) where TResult : YoloV8Result; \ No newline at end of file diff --git a/Source/YoloV8/Base/YoloV8Configuration.cs b/Source/YoloV8/Base/YoloV8Configuration.cs deleted file mode 100644 index 74cdb2a..0000000 --- a/Source/YoloV8/Base/YoloV8Configuration.cs +++ /dev/null @@ -1,22 +0,0 @@ -namespace Compunet.YoloV8; - -public class YoloV8Configuration -{ - public static readonly YoloV8Configuration Default = new(); - - public float Confidence { get; set; } - - public float IoU { get; set; } - - public bool KeepOriginalAspectRatio { get; set; } - - public bool SuppressParallelInference { get; set; } - - public YoloV8Configuration() - { - Confidence = .3f; - IoU = .45f; - KeepOriginalAspectRatio = true; - SuppressParallelInference = false; - } -} \ No newline at end of file diff --git a/Source/YoloV8/Base/YoloV8Extensions.cs b/Source/YoloV8/Base/YoloV8Extensions.cs deleted file mode 100644 index 30b8775..0000000 --- a/Source/YoloV8/Base/YoloV8Extensions.cs +++ /dev/null @@ -1,197 +0,0 @@ -namespace Compunet.YoloV8; - -public static partial class YoloV8Extensions -{ - public static PoseResult Pose(this YoloV8Predictor predictor, ImageSelector selector, YoloV8Configuration? configuration = null) - { - configuration ??= predictor.Configuration; - - predictor.ValidatePoseShape(); - - return predictor.Run(selector, (outputs, image, timer) => - { - var output = outputs[0].AsTensor(); - - var parser = new PoseOutputParser(predictor.Metadata, configuration); - - var boxes = parser.Parse(output, image); - - var speed = timer.Stop(); - - return new PoseResult - { - Boxes = boxes, - Image = image, - Speed = speed, - }; - }, configuration); - } - - public static DetectionResult Detect(this YoloV8Predictor predictor, ImageSelector selector, YoloV8Configuration? configuration = null) - { - configuration ??= predictor.Configuration; - - predictor.ValidateTask(YoloV8Task.Detect); - - return predictor.Run(selector, (outputs, image, timer) => - { - var output = outputs[0].AsTensor(); - - var parser = new DetectionOutputParser(predictor.Metadata, configuration); - - var boxes = parser.Parse(output, image); - - var speed = timer.Stop(); - - return new DetectionResult - { - Boxes = boxes, - Image = image, - Speed = speed, - }; - }, configuration); - } - - public static ObbDetectionResult DetectObb(this YoloV8Predictor predictor, ImageSelector selector, YoloV8Configuration? configuration = null) - { - configuration ??= predictor.Configuration; - - predictor.ValidateTask(YoloV8Task.Obb); - - return predictor.Run(selector, (outputs, image, timer) => - { - var output = outputs[0].AsTensor(); - - var parser = new ObbDetectionOutputParser(predictor.Metadata, configuration); - - var boxes = parser.Parse(output, image); - - var speed = timer.Stop(); - - return new ObbDetectionResult - { - Boxes = boxes, - Image = image, - Speed = speed, - }; - }, configuration); - } - - public static SegmentationResult Segment(this YoloV8Predictor predictor, ImageSelector selector, YoloV8Configuration? configuration = null) - { - configuration ??= predictor.Configuration; - - predictor.ValidateTask(YoloV8Task.Segment); - - return predictor.Run(selector, (outputs, image, timer) => - { - var parser = new SegmentationOutputParser(predictor.Metadata, configuration); - - var boxesOutput = outputs[0].AsTensor(); - var maskPrototypes = outputs[1].AsTensor(); - - var boxes = parser.Parse(boxesOutput, maskPrototypes, image); - - var speed = timer.Stop(); - - return new SegmentationResult - { - Boxes = boxes, - Image = image, - Speed = speed, - }; - }, configuration); - } - - public static ClassificationResult Classify(this YoloV8Predictor predictor, ImageSelector selector, YoloV8Configuration? configuration = null) - { - configuration ??= predictor.Configuration; - - predictor.ValidateTask(YoloV8Task.Classify); - - return predictor.Run(selector, (outputs, image, timer) => - { - var output = outputs[0].AsEnumerable().ToList(); - - var probs = new ClassProbability[output.Count]; - - for (int i = 0; i < output.Count; i++) - { - var name = predictor.Metadata.Names[i]; - var confidence = output[i]; - - probs[i] = new ClassProbability - { - Name = name, - Confidence = confidence, - }; - } - - var top = probs.MaxBy(x => x.Confidence) ?? throw new Exception(); - - var speed = timer.Stop(); - - return new ClassificationResult - { - TopClass = top, - Probabilities = probs, - Image = image, - Speed = speed, - }; - }, configuration); - } - - #region Async Operations - - public static async Task PoseAsync(this YoloV8Predictor predictor, ImageSelector selector, YoloV8Configuration? configuration = null) - { - return await Task.Run(() => predictor.Pose(selector, configuration)); - } - - public static async Task DetectAsync(this YoloV8Predictor predictor, ImageSelector selector, YoloV8Configuration? configuration = null) - { - return await Task.Run(() => predictor.Detect(selector, configuration)); - } - - public static async Task DetectObbAsync(this YoloV8Predictor predictor, ImageSelector selector, YoloV8Configuration? configuration = null) - { - return await Task.Run(() => predictor.DetectObb(selector, configuration)); - } - - public static async Task SegmentAsync(this YoloV8Predictor predictor, ImageSelector selector, YoloV8Configuration? configuration = null) - { - return await Task.Run(() => predictor.Segment(selector, configuration)); - } - - public static async Task ClassifyAsync(this YoloV8Predictor predictor, ImageSelector selector, YoloV8Configuration? configuration = null) - { - return await Task.Run(() => predictor.Classify(selector, configuration)); - } - - #endregion - - private static void ValidateTask(this YoloV8Predictor predictor, YoloV8Task task) - { - if (predictor.Metadata.Task != task) - { - throw new InvalidOperationException("The loaded model does not support this task"); - } - } - - private static void ValidatePoseShape(this YoloV8Predictor predictor) - { - predictor.ValidateTask(YoloV8Task.Pose); - - if (predictor.Metadata is YoloV8PoseMetadata metadata) - { - var shape = metadata.KeypointShape; - - if (shape.Channels is 2 or 3) - { - return; - } - } - - throw new NotSupportedException("The this keypoint shape is not supported"); - } -} \ No newline at end of file diff --git a/Source/YoloV8/Base/YoloV8Predictor.cs b/Source/YoloV8/Base/YoloV8Predictor.cs deleted file mode 100644 index 535f68e..0000000 --- a/Source/YoloV8/Base/YoloV8Predictor.cs +++ /dev/null @@ -1,107 +0,0 @@ -namespace Compunet.YoloV8; - -public class YoloV8Predictor : IDisposable -{ - private readonly InferenceSession _inference; - - private readonly object _locker = new(); - - private bool _disposed; - - public YoloV8Metadata Metadata { get; } - - public YoloV8Configuration Configuration { get; } - - public static YoloV8Predictor Create(BinarySelector model) => YoloV8Builder.CreateDefaultBuilder().UseOnnxModel(model).Build(); - - internal YoloV8Predictor(BinarySelector model, YoloV8Metadata? metadata, YoloV8Configuration? configuration, SessionOptions? options) - { - _inference = new InferenceSession(model.Load(), options ?? new SessionOptions()); - - Metadata = metadata ?? YoloV8Metadata.Parse(_inference.ModelMetadata.CustomMetadataMap); - Configuration = configuration ?? YoloV8Configuration.Default; - } - - public TResult Run(ImageSelector selector, PostprocessContext postprocess, YoloV8Configuration? configuration = null) where TResult : YoloV8Result - { - configuration ??= Configuration; - - using var image = selector.Load(true); - - var originSize = image.Size; - - var timer = new SpeedTimer(); - - timer.StartPreprocess(); - - var input = Preprocess(image, configuration); - - var inputs = CreateInputAndMapNames([input]); - - timer.StartInference(); - - using var outputs = Infer(inputs, configuration); - - var list = new List(outputs); - - timer.StartPostprocess(); - - return postprocess(list, originSize, timer); - } - - private IDisposableReadOnlyCollection Infer(IReadOnlyCollection inputs, YoloV8Configuration configuration) - { - if (configuration.SuppressParallelInference) - { - lock (_locker) - { - return _inference.Run(inputs); - } - } - - return _inference.Run(inputs); - } - - private Tensor Preprocess(Image image, YoloV8Configuration configuration) - { - var modelSize = Metadata.ImageSize; - - var dimensions = new int[] { 1, 3, modelSize.Height, modelSize.Width }; - var input = new DenseTensor(dimensions); - - PreprocessHelper.ProcessToTensor(image, modelSize, configuration.KeepOriginalAspectRatio, input, 0); - - return input; - } - - private NamedOnnxValue[] CreateInputAndMapNames(ReadOnlySpan> inputs) - { - var length = inputs.Length; - - var values = new NamedOnnxValue[length]; - - for (int i = 0; i < length; i++) - { - var name = _inference.InputNames[i]; - - var value = NamedOnnxValue.CreateFromTensor(name, inputs[i]); - - values[i] = value; - } - - return values; - } - - public void Dispose() - { - if (_disposed) - { - return; - } - - _inference.Dispose(); - _disposed = true; - - GC.SuppressFinalize(this); - } -} \ No newline at end of file diff --git a/Source/YoloV8/Builder/IYoloV8Builder.cs b/Source/YoloV8/Builder/IYoloV8Builder.cs deleted file mode 100644 index b8dc870..0000000 --- a/Source/YoloV8/Builder/IYoloV8Builder.cs +++ /dev/null @@ -1,29 +0,0 @@ -namespace Compunet.YoloV8; - -public interface IYoloV8Builder -{ - public IYoloV8Builder UseOnnxModel(BinarySelector model); - -#if GPURELEASE - - public IYoloV8Builder UseCuda(int deviceId = 0); - public IYoloV8Builder UseCuda(OrtCUDAProviderOptions options); - - public IYoloV8Builder UseRocm(int deviceId = 0); - public IYoloV8Builder UseRocm(OrtROCMProviderOptions options); - - public IYoloV8Builder UseTensorrt(int deviceId = 0); - public IYoloV8Builder UseTensorrt(OrtTensorRTProviderOptions options); - - public IYoloV8Builder UseTvm(string settings = ""); - -#endif - - public IYoloV8Builder WithMetadata(YoloV8Metadata metadata); - - public IYoloV8Builder WithConfiguration(Action configure); - - public IYoloV8Builder WithSessionOptions(SessionOptions sessionOptions); - - public YoloV8Predictor Build(); -} \ No newline at end of file diff --git a/Source/YoloV8/Builder/YoloV8Builder.cs b/Source/YoloV8/Builder/YoloV8Builder.cs deleted file mode 100644 index c105426..0000000 --- a/Source/YoloV8/Builder/YoloV8Builder.cs +++ /dev/null @@ -1,79 +0,0 @@ -namespace Compunet.YoloV8; - -public class YoloV8Builder : IYoloV8Builder -{ - private BinarySelector? _model; - - private SessionOptions? _sessionOptions; - - private YoloV8Metadata? _metadata; - private YoloV8Configuration? _configuration; - - public static IYoloV8Builder CreateDefaultBuilder() - { - var builder = new YoloV8Builder(); - -#if GPURELEASE - builder.UseCuda(0); -#endif - - return builder; - } - - public YoloV8Predictor Build() - { - if (_model is null) - { - throw new ApplicationException("No model selected"); - } - - return new YoloV8Predictor(_model, _metadata, _configuration, _sessionOptions); - } - - public IYoloV8Builder UseOnnxModel(BinarySelector model) - { - _model = model; - - return this; - } - -#if GPURELEASE - - public IYoloV8Builder UseCuda(int deviceId) => WithSessionOptions(SessionOptions.MakeSessionOptionWithCudaProvider(deviceId)); - public IYoloV8Builder UseCuda(OrtCUDAProviderOptions options) => WithSessionOptions(SessionOptions.MakeSessionOptionWithCudaProvider(options)); - - public IYoloV8Builder UseRocm(int deviceId) => WithSessionOptions(SessionOptions.MakeSessionOptionWithRocmProvider(deviceId)); - public IYoloV8Builder UseRocm(OrtROCMProviderOptions options) => WithSessionOptions(SessionOptions.MakeSessionOptionWithRocmProvider(options)); - - public IYoloV8Builder UseTensorrt(int deviceId) => WithSessionOptions(SessionOptions.MakeSessionOptionWithTensorrtProvider(deviceId)); - public IYoloV8Builder UseTensorrt(OrtTensorRTProviderOptions options) => WithSessionOptions(SessionOptions.MakeSessionOptionWithTensorrtProvider(options)); - - public IYoloV8Builder UseTvm(string settings = "") => WithSessionOptions(SessionOptions.MakeSessionOptionWithTvmProvider(settings)); - -#endif - - public IYoloV8Builder WithMetadata(YoloV8Metadata metadata) - { - _metadata = metadata; - - return this; - } - - public IYoloV8Builder WithConfiguration(Action configure) - { - var configuration = new YoloV8Configuration(); - - configure(configuration); - - _configuration = configuration; - - return this; - } - - public IYoloV8Builder WithSessionOptions(SessionOptions sessionOptions) - { - _sessionOptions = sessionOptions; - - return this; - } -} \ No newline at end of file diff --git a/Source/YoloV8/Contracts/Services/IMemoryAllocatorService.cs b/Source/YoloV8/Contracts/Services/IMemoryAllocatorService.cs new file mode 100644 index 0000000..e207f8c --- /dev/null +++ b/Source/YoloV8/Contracts/Services/IMemoryAllocatorService.cs @@ -0,0 +1,6 @@ +namespace Compunet.YoloV8.Contracts.Services; + +internal interface IMemoryAllocatorService +{ + public IMemoryOwner Allocate(int length, bool clean = false); +} \ No newline at end of file diff --git a/Source/YoloV8/Contracts/Services/INonMaxSupressionService.cs b/Source/YoloV8/Contracts/Services/INonMaxSupressionService.cs new file mode 100644 index 0000000..2b7a3c8 --- /dev/null +++ b/Source/YoloV8/Contracts/Services/INonMaxSupressionService.cs @@ -0,0 +1,6 @@ +namespace Compunet.YoloV8.Contracts.Services; + +internal interface INonMaxSuppressionService +{ + public T[] Suppress(Span boxes, float iouThreshold) where T : IRawBoundingBox; +} \ No newline at end of file diff --git a/Source/YoloV8/Contracts/Services/IPreprocessService.cs b/Source/YoloV8/Contracts/Services/IPreprocessService.cs new file mode 100644 index 0000000..a0404c6 --- /dev/null +++ b/Source/YoloV8/Contracts/Services/IPreprocessService.cs @@ -0,0 +1,6 @@ +namespace Compunet.YoloV8.Contracts.Services; + +internal interface IPreprocessService +{ + public void ProcessImageToTensor(Image image, DenseTensor tensor, Vector padding); +} \ No newline at end of file diff --git a/Source/YoloV8/Contracts/Services/ISessionRunnerService.cs b/Source/YoloV8/Contracts/Services/ISessionRunnerService.cs new file mode 100644 index 0000000..be063c9 --- /dev/null +++ b/Source/YoloV8/Contracts/Services/ISessionRunnerService.cs @@ -0,0 +1,6 @@ +namespace Compunet.YoloV8.Contracts.Services; + +internal interface ISessionRunnerService +{ + public YoloRawOutput PreprocessAndRun(Image image, out PredictorTimer timer); +} \ No newline at end of file diff --git a/Source/YoloV8/Contracts/Services/Parsing/IParser{T}.cs b/Source/YoloV8/Contracts/Services/Parsing/IParser{T}.cs new file mode 100644 index 0000000..78ec6e5 --- /dev/null +++ b/Source/YoloV8/Contracts/Services/Parsing/IParser{T}.cs @@ -0,0 +1,6 @@ +namespace Compunet.YoloV8.Contracts.Services; + +internal interface IParser +{ + public T[] ProcessTensorToResult(YoloRawOutput output, Size size); +} \ No newline at end of file diff --git a/Source/YoloV8/Contracts/Services/Parsing/IRawBoundingBoxParser.cs b/Source/YoloV8/Contracts/Services/Parsing/IRawBoundingBoxParser.cs new file mode 100644 index 0000000..2287d2a --- /dev/null +++ b/Source/YoloV8/Contracts/Services/Parsing/IRawBoundingBoxParser.cs @@ -0,0 +1,10 @@ +namespace Compunet.YoloV8.Contracts.Services; + +internal interface IRawBoundingBoxParser +{ + public T[] Parse(DenseTensor tensor, Size imageSize) where T : IRawBoundingBox; + + public T[] Parse(DenseTensor tensor, Size imageSize, Vector padding) where T : IRawBoundingBox; + + public T[] Parse(DenseTensor tensor, Vector padding, Vector ratio) where T : IRawBoundingBox; +} \ No newline at end of file diff --git a/Source/YoloV8/Data/BoundingBox.cs b/Source/YoloV8/Data/BoundingBox.cs deleted file mode 100644 index 11851d1..0000000 --- a/Source/YoloV8/Data/BoundingBox.cs +++ /dev/null @@ -1,15 +0,0 @@ -namespace Compunet.YoloV8.Data; - -public class BoundingBox -{ - public required YoloV8Class Class { get; init; } - - public required Rectangle Bounds { get; init; } - - public required float Confidence { get; init; } - - public override string ToString() - { - return $"{Class.Name} ({Confidence:N})"; - } -} \ No newline at end of file diff --git a/Source/YoloV8/Data/Classification.cs b/Source/YoloV8/Data/Classification.cs new file mode 100644 index 0000000..3bc8961 --- /dev/null +++ b/Source/YoloV8/Data/Classification.cs @@ -0,0 +1,7 @@ + +namespace Compunet.YoloV8.Data; + +public class Classification : YoloPrediction, IYoloPrediction +{ + static string IYoloPrediction.Describe(Classification[] predictions) => predictions[0].ToString(); +} \ No newline at end of file diff --git a/Source/YoloV8/Data/ClassificationResult.cs b/Source/YoloV8/Data/ClassificationResult.cs deleted file mode 100644 index 8c7127b..0000000 --- a/Source/YoloV8/Data/ClassificationResult.cs +++ /dev/null @@ -1,13 +0,0 @@ -namespace Compunet.YoloV8.Data; - -public class ClassificationResult : YoloV8Result -{ - public required ClassProbability TopClass { get; init; } - - public required ClassProbability[] Probabilities { get; init; } - - public override string ToString() - { - return TopClass.ToString(); - } -} \ No newline at end of file diff --git a/Source/YoloV8/Data/Detection.cs b/Source/YoloV8/Data/Detection.cs new file mode 100644 index 0000000..031a7a6 --- /dev/null +++ b/Source/YoloV8/Data/Detection.cs @@ -0,0 +1,8 @@ +namespace Compunet.YoloV8.Data; + +public class Detection : YoloPrediction, IYoloPrediction +{ + public required Rectangle Bounds { get; init; } + + static string IYoloPrediction.Describe(Detection[] predictions) => predictions.Summary(); +} \ No newline at end of file diff --git a/Source/YoloV8/Data/DetectionResult.cs b/Source/YoloV8/Data/DetectionResult.cs deleted file mode 100644 index 30a2410..0000000 --- a/Source/YoloV8/Data/DetectionResult.cs +++ /dev/null @@ -1,8 +0,0 @@ -namespace Compunet.YoloV8.Data; - -public class DetectionResult : YoloV8Result -{ - public required BoundingBox[] Boxes { get; init; } - - public override string ToString() => Boxes.Summary(); -} \ No newline at end of file diff --git a/Source/YoloV8/Data/IYoloPrediction{TSelf}.cs b/Source/YoloV8/Data/IYoloPrediction{TSelf}.cs new file mode 100644 index 0000000..e2389be --- /dev/null +++ b/Source/YoloV8/Data/IYoloPrediction{TSelf}.cs @@ -0,0 +1,6 @@ +namespace Compunet.YoloV8.Data; + +public interface IYoloPrediction +{ + internal abstract static string Describe(TSelf[] predictions); +} \ No newline at end of file diff --git a/Source/YoloV8/Data/ObbBoundingBox.cs b/Source/YoloV8/Data/ObbBoundingBox.cs deleted file mode 100644 index d7409fc..0000000 --- a/Source/YoloV8/Data/ObbBoundingBox.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace Compunet.YoloV8.Data; - -public class ObbBoundingBox : BoundingBox -{ - public required float Angle { get; init; } -} \ No newline at end of file diff --git a/Source/YoloV8/Data/ObbDetection.cs b/Source/YoloV8/Data/ObbDetection.cs new file mode 100644 index 0000000..0af9459 --- /dev/null +++ b/Source/YoloV8/Data/ObbDetection.cs @@ -0,0 +1,8 @@ +namespace Compunet.YoloV8.Data; + +public class ObbDetection : Detection, IYoloPrediction +{ + public required float Angle { get; init; } + + static string IYoloPrediction.Describe(ObbDetection[] predictions) => predictions.Summary(); +} \ No newline at end of file diff --git a/Source/YoloV8/Data/ObbDetectionResult.cs b/Source/YoloV8/Data/ObbDetectionResult.cs deleted file mode 100644 index 6b4939e..0000000 --- a/Source/YoloV8/Data/ObbDetectionResult.cs +++ /dev/null @@ -1,8 +0,0 @@ -namespace Compunet.YoloV8.Data; - -public class ObbDetectionResult : YoloV8Result -{ - public required ObbBoundingBox[] Boxes { get; init; } - - public override string ToString() => Boxes.Summary(); -} \ No newline at end of file diff --git a/Source/YoloV8/Data/Pose.cs b/Source/YoloV8/Data/Pose.cs new file mode 100644 index 0000000..b22a699 --- /dev/null +++ b/Source/YoloV8/Data/Pose.cs @@ -0,0 +1,22 @@ +namespace Compunet.YoloV8.Data; + +public class Pose(Keypoint[] keypoints) : Detection, IYoloPrediction, IEnumerable +{ + public Keypoint this[int index] => keypoints[index]; + + static string IYoloPrediction.Describe(Pose[] predictions) => predictions.Summary(); + + #region Enumerator + + public IEnumerator GetEnumerator() + { + foreach (var item in keypoints) + { + yield return item; + } + } + + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); + + #endregion +} \ No newline at end of file diff --git a/Source/YoloV8/Data/PoseBoundingBox.cs b/Source/YoloV8/Data/PoseBoundingBox.cs deleted file mode 100644 index 69a87af..0000000 --- a/Source/YoloV8/Data/PoseBoundingBox.cs +++ /dev/null @@ -1,11 +0,0 @@ -namespace Compunet.YoloV8.Data; - -public class PoseBoundingBox : BoundingBox -{ - public required Keypoint[] Keypoints { get; init; } - - public Keypoint? GetKeypoint(int index) - { - return Keypoints.SingleOrDefault(x => x.Index == index); - } -} \ No newline at end of file diff --git a/Source/YoloV8/Data/PoseResult.cs b/Source/YoloV8/Data/PoseResult.cs deleted file mode 100644 index c5c8caa..0000000 --- a/Source/YoloV8/Data/PoseResult.cs +++ /dev/null @@ -1,8 +0,0 @@ -namespace Compunet.YoloV8.Data; - -public class PoseResult : YoloV8Result -{ - public required PoseBoundingBox[] Boxes { get; init; } - - public override string ToString() => Boxes.Summary(); -} \ No newline at end of file diff --git a/Source/YoloV8/Data/Segmentation.cs b/Source/YoloV8/Data/Segmentation.cs new file mode 100644 index 0000000..0a722a9 --- /dev/null +++ b/Source/YoloV8/Data/Segmentation.cs @@ -0,0 +1,8 @@ +namespace Compunet.YoloV8.Data; + +public class Segmentation : Detection, IYoloPrediction +{ + public required SegmentationMask Mask { get; init; } + + static string IYoloPrediction.Describe(Segmentation[] predictions) => predictions.Summary(); +} \ No newline at end of file diff --git a/Source/YoloV8/Data/SegmentationBoundingBox.cs b/Source/YoloV8/Data/SegmentationBoundingBox.cs deleted file mode 100644 index cef574c..0000000 --- a/Source/YoloV8/Data/SegmentationBoundingBox.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace Compunet.YoloV8.Data; - -public class SegmentationBoundingBox : BoundingBox -{ - public required SegmentationMask Mask { get; init; } -} \ No newline at end of file diff --git a/Source/YoloV8/Data/Mask.cs b/Source/YoloV8/Data/SegmentationMask.cs similarity index 100% rename from Source/YoloV8/Data/Mask.cs rename to Source/YoloV8/Data/SegmentationMask.cs diff --git a/Source/YoloV8/Data/SegmentationResult.cs b/Source/YoloV8/Data/SegmentationResult.cs deleted file mode 100644 index 8146127..0000000 --- a/Source/YoloV8/Data/SegmentationResult.cs +++ /dev/null @@ -1,8 +0,0 @@ -namespace Compunet.YoloV8.Data; - -public class SegmentationResult : YoloV8Result -{ - public required SegmentationBoundingBox[] Boxes { get; init; } - - public override string ToString() => Boxes.Summary(); -} \ No newline at end of file diff --git a/Source/YoloV8/Timing/SpeedResult.cs b/Source/YoloV8/Data/SpeedResult.cs similarity index 93% rename from Source/YoloV8/Timing/SpeedResult.cs rename to Source/YoloV8/Data/SpeedResult.cs index 07fec15..31c485f 100644 --- a/Source/YoloV8/Timing/SpeedResult.cs +++ b/Source/YoloV8/Data/SpeedResult.cs @@ -1,4 +1,4 @@ -namespace Compunet.YoloV8.Timing; +namespace Compunet.YoloV8.Data; public readonly struct SpeedResult(TimeSpan preprocess, TimeSpan inference, diff --git a/Source/YoloV8/Data/ClassProbability.cs b/Source/YoloV8/Data/YoloPrediction.cs similarity index 68% rename from Source/YoloV8/Data/ClassProbability.cs rename to Source/YoloV8/Data/YoloPrediction.cs index 1db9a38..6d92aa8 100644 --- a/Source/YoloV8/Data/ClassProbability.cs +++ b/Source/YoloV8/Data/YoloPrediction.cs @@ -1,8 +1,8 @@ namespace Compunet.YoloV8.Data; -public class ClassProbability +public abstract class YoloPrediction { - public required YoloV8Class Name { get; init; } + public required YoloName Name { get; init; } public required float Confidence { get; init; } diff --git a/Source/YoloV8/Data/YoloPredictionExtensions.cs b/Source/YoloV8/Data/YoloPredictionExtensions.cs new file mode 100644 index 0000000..b014c75 --- /dev/null +++ b/Source/YoloV8/Data/YoloPredictionExtensions.cs @@ -0,0 +1,6 @@ +namespace Compunet.YoloV8.Data; + +public static class YoloPredictionExtensions +{ + public static Classification GetTopClass(this YoloResult result) => result[0]; +} \ No newline at end of file diff --git a/Source/YoloV8/Data/YoloV8Result.cs b/Source/YoloV8/Data/YoloResult.cs similarity index 53% rename from Source/YoloV8/Data/YoloV8Result.cs rename to Source/YoloV8/Data/YoloResult.cs index d098c7d..cf3e643 100644 --- a/Source/YoloV8/Data/YoloV8Result.cs +++ b/Source/YoloV8/Data/YoloResult.cs @@ -1,8 +1,8 @@ namespace Compunet.YoloV8.Data; -public abstract class YoloV8Result +public class YoloResult { - public required Size Image { get; init; } + public required Size ImageSize { get; init; } public required SpeedResult Speed { get; init; } } \ No newline at end of file diff --git a/Source/YoloV8/Data/YoloResult{T}.cs b/Source/YoloV8/Data/YoloResult{T}.cs new file mode 100644 index 0000000..84d624b --- /dev/null +++ b/Source/YoloV8/Data/YoloResult{T}.cs @@ -0,0 +1,24 @@ +namespace Compunet.YoloV8.Data; + +public class YoloResult(TPrediction[] predictions) : YoloResult, IEnumerable where TPrediction : IYoloPrediction +{ + public TPrediction this[int index] => predictions[index]; + + public int Count => predictions.Length; + + public override string ToString() => TPrediction.Describe(predictions); + + #region Enumerator + + public IEnumerator GetEnumerator() + { + foreach (var item in predictions) + { + yield return item; + } + } + + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); + + #endregion +} \ No newline at end of file diff --git a/Source/YoloV8/Extensions/BoundingBoxesExtensions.cs b/Source/YoloV8/Extensions/DetectionBoxesExtensions.cs similarity index 61% rename from Source/YoloV8/Extensions/BoundingBoxesExtensions.cs rename to Source/YoloV8/Extensions/DetectionBoxesExtensions.cs index 09f6ee3..cf169bf 100644 --- a/Source/YoloV8/Extensions/BoundingBoxesExtensions.cs +++ b/Source/YoloV8/Extensions/DetectionBoxesExtensions.cs @@ -1,10 +1,10 @@ namespace Compunet.YoloV8.Extensions; -internal static class BoundingBoxesExtensions +internal static class DetectionBoxesExtensions { - public static string Summary(this IEnumerable boxes) + public static string Summary(this IEnumerable boxes) { - var sort = boxes.Select(x => x.Class) + var sort = boxes.Select(x => x.Name) .GroupBy(x => x.Id) .OrderBy(x => x.Key) .Select(x => $"{x.Count()} {x.First().Name}"); diff --git a/Source/YoloV8/Extensions/ImageSharpExtensions.cs b/Source/YoloV8/Extensions/ImageSharpExtensions.cs index a30b370..202d45f 100644 --- a/Source/YoloV8/Extensions/ImageSharpExtensions.cs +++ b/Source/YoloV8/Extensions/ImageSharpExtensions.cs @@ -2,38 +2,15 @@ internal static class ImageSharpExtensions { - public static void EnumeratePixels(this Image image, Action iterator) where TPixel : unmanaged, IPixel + public static Image As(this Image image) where TPixel : unmanaged, IPixel { - var width = image.Width; - var height = image.Height; - - if (image.DangerousTryGetSinglePixelMemory(out var memory)) + if (image is Image result) { - Parallel.For(0, width * height, index => - { - int x = index % width; - int y = index / width; - - var point = new Point(x, y); - var pixel = memory.Span[index]; - - iterator(point, pixel); - }); + return result; } - else - { - Parallel.For(0, image.Height, y => - { - var row = image.DangerousGetPixelRowMemory(y).Span; - for (int x = 0; x < image.Width; x++) - { - var point = new Point(x, y); - var pixel = row[x]; - - iterator(point, pixel); - } - }); - } + return image.CloneAs(); } + + public static void AutoOrient(this Image image) => image.Mutate(x => x.AutoOrient()); } \ No newline at end of file diff --git a/Source/YoloV8/Extensions/OrientedBoundingBoxExtensions.cs b/Source/YoloV8/Extensions/OrientedBoundingBoxExtensions.cs index 305cc2d..9e699f0 100644 --- a/Source/YoloV8/Extensions/OrientedBoundingBoxExtensions.cs +++ b/Source/YoloV8/Extensions/OrientedBoundingBoxExtensions.cs @@ -2,22 +2,22 @@ internal static class OrientedBoundingBoxExtensions { - public static Point[] GetCornerPoints(this ObbBoundingBox obb) + public static Point[] GetCornerPoints(this ObbDetection obb) { return GetCornerPoints(obb.Bounds, obb.Angle); } - public static Point[] GetCornerPoints(this ObbIndexedBoundingBox obb) + public static Point[] GetCornerPoints(this RawObbBoundingBox obb) { return GetCornerPoints(obb.Bounds, obb.Angle); } - private static Point[] GetCornerPoints(Rectangle bounds, float _angle) + private static Point[] GetCornerPoints(Rectangle bounds, float angle) { - var angle = _angle * Math.PI / 180.0; // Radians + var _angle = angle * MathF.PI / 180.0f; // Radians - var b = (float)Math.Cos(angle) * 0.5f; - var a = (float)Math.Sin(angle) * 0.5f; + var b = MathF.Cos(_angle) * .5f; + var a = MathF.Sin(_angle) * .5f; var x = bounds.X; var y = bounds.Y; @@ -26,17 +26,17 @@ private static Point[] GetCornerPoints(Rectangle bounds, float _angle) var points = new Point[4]; - points[0].X = (int)Math.Round(x - a * h - b * w, 0); - points[0].Y = (int)Math.Round(y + b * h - a * w, 0); + points[0].X = (int)MathF.Round(x - a * h - b * w, 0); + points[0].Y = (int)MathF.Round(y + b * h - a * w, 0); - points[1].X = (int)Math.Round(x + a * h - b * w, 0); - points[1].Y = (int)Math.Round(y - b * h - a * w, 0); + points[1].X = (int)MathF.Round(x + a * h - b * w, 0); + points[1].Y = (int)MathF.Round(y - b * h - a * w, 0); - points[2].X = (int)Math.Round(2d * x - points[0].X, 0); - points[2].Y = (int)Math.Round(2d * y - points[0].Y, 0); + points[2].X = (int)MathF.Round(2f * x - points[0].X, 0); + points[2].Y = (int)MathF.Round(2f * y - points[0].Y, 0); - points[3].X = (int)Math.Round(2d * x - points[1].X, 0); - points[3].Y = (int)Math.Round(2d * y - points[1].Y, 0); + points[3].X = (int)MathF.Round(2f * x - points[1].X, 0); + points[3].Y = (int)MathF.Round(2f * y - points[1].Y, 0); // Calculate the distances of each point from the origin (0, 0) var distance1 = Math.Sqrt(Math.Pow(points[0].X, 2) + Math.Pow(points[0].Y, 2)); diff --git a/Source/YoloV8/Global/Assembly.cs b/Source/YoloV8/Global/Assembly.cs new file mode 100644 index 0000000..fce4367 --- /dev/null +++ b/Source/YoloV8/Global/Assembly.cs @@ -0,0 +1 @@ +[assembly: InternalsVisibleTo("YoloV8.Tests")] \ No newline at end of file diff --git a/Source/YoloV8/Usings.cs b/Source/YoloV8/Global/Usings.cs similarity index 67% rename from Source/YoloV8/Usings.cs rename to Source/YoloV8/Global/Usings.cs index c9b22c0..5a8c31e 100644 --- a/Source/YoloV8/Usings.cs +++ b/Source/YoloV8/Global/Usings.cs @@ -1,11 +1,14 @@ global using Clipper2Lib; +global using Compunet.YoloV8.Contracts.Services; global using Compunet.YoloV8.Data; global using Compunet.YoloV8.Extensions; +global using Compunet.YoloV8.Memory; global using Compunet.YoloV8.Metadata; -global using Compunet.YoloV8.Parsers; +global using Compunet.YoloV8.Parsing; global using Compunet.YoloV8.Plotting; -global using Compunet.YoloV8.Timing; +global using Compunet.YoloV8.Services; global using Compunet.YoloV8.Utilities; +global using Microsoft.Extensions.DependencyInjection; global using Microsoft.ML.OnnxRuntime; global using Microsoft.ML.OnnxRuntime.Tensors; global using SixLabors.Fonts; @@ -13,8 +16,11 @@ global using SixLabors.ImageSharp.Advanced; global using SixLabors.ImageSharp.Drawing; global using SixLabors.ImageSharp.Drawing.Processing; +global using SixLabors.ImageSharp.Formats; global using SixLabors.ImageSharp.PixelFormats; global using SixLabors.ImageSharp.Processing; global using System.Buffers; +global using System.Collections; global using System.Data; global using System.Diagnostics; +global using System.Runtime.CompilerServices; diff --git a/Source/YoloV8/Memory/DenseTensorOwner.cs b/Source/YoloV8/Memory/DenseTensorOwner.cs new file mode 100644 index 0000000..243e892 --- /dev/null +++ b/Source/YoloV8/Memory/DenseTensorOwner.cs @@ -0,0 +1,24 @@ +namespace Compunet.YoloV8.Memory; + +internal class DenseTensorOwner(IMemoryOwner owner, ReadOnlySpan dimensions) : IDisposable +{ + private DenseTensor? _tensor = new(owner.Memory, dimensions); + + public DenseTensor Tensor + { + get + { + ObjectDisposedException.ThrowIf(_tensor is null, this); + return _tensor; + } + } + + public void Dispose() + { + if (_tensor != null) + { + _tensor = null; + owner.Dispose(); + } + } +} \ No newline at end of file diff --git a/Source/YoloV8/Memory/MemoryAllocatorExtensions.cs b/Source/YoloV8/Memory/MemoryAllocatorExtensions.cs new file mode 100644 index 0000000..ccf164c --- /dev/null +++ b/Source/YoloV8/Memory/MemoryAllocatorExtensions.cs @@ -0,0 +1,11 @@ +namespace Compunet.YoloV8.Memory; + +internal static class MemoryAllocatorExtensions +{ + public static DenseTensorOwner AllocateTensor(this IMemoryAllocatorService allocator, TensorShape shape, bool clean = false) + { + var memory = allocator.Allocate(shape.Length, clean); + + return new DenseTensorOwner(memory, shape.Dimensions); + } +} \ No newline at end of file diff --git a/Source/YoloV8/Memory/TensorShape.cs b/Source/YoloV8/Memory/TensorShape.cs new file mode 100644 index 0000000..a6e64ce --- /dev/null +++ b/Source/YoloV8/Memory/TensorShape.cs @@ -0,0 +1,29 @@ +namespace Compunet.YoloV8.Memory; + +internal readonly struct TensorShape(int[] shape) +{ + public int Length { get; } = GetSizeForShape(shape); + + public int[] Dimensions { get; } = shape; + + public long[] Dimensions64 { get; } = [.. shape.Select(x => (long)x)]; + + private static int GetSizeForShape(ReadOnlySpan shape) + { + var product = 1; + + for (var i = 0; i < shape.Length; i++) + { + var dimension = shape[i]; + + if (dimension < 0) + { + throw new ArgumentOutOfRangeException($"Shape must not have negative elements: {dimension}"); + } + + product = checked(product * dimension); + } + + return product; + } +} diff --git a/Source/YoloV8/Memory/YoloRawOutput.cs b/Source/YoloV8/Memory/YoloRawOutput.cs new file mode 100644 index 0000000..97f7a37 --- /dev/null +++ b/Source/YoloV8/Memory/YoloRawOutput.cs @@ -0,0 +1,42 @@ +namespace Compunet.YoloV8.Memory; + +internal class YoloRawOutput(DenseTensorOwner output0, DenseTensorOwner? output1) : IDisposable +{ + private bool _disposed; + + public DenseTensor Output0 + { + get + { + EnsureNotDisposed(); + return output0.Tensor; + } + } + + public DenseTensor? Output1 + { + get + { + EnsureNotDisposed(); + return output1?.Tensor; + } + } + + public void Dispose() + { + if (_disposed) + { + return; + } + + output0.Dispose(); + output1?.Dispose(); + + _disposed = true; + } + + private void EnsureNotDisposed() + { + ObjectDisposedException.ThrowIf(_disposed, this); + } +} \ No newline at end of file diff --git a/Source/YoloV8/Metadata/SessionTensorInfo.cs b/Source/YoloV8/Metadata/SessionTensorInfo.cs new file mode 100644 index 0000000..6e45d21 --- /dev/null +++ b/Source/YoloV8/Metadata/SessionTensorInfo.cs @@ -0,0 +1,24 @@ +namespace Compunet.YoloV8.Metadata; + +internal class SessionTensorInfo +{ + public TensorShape Input0 { get; } + + public TensorShape Output0 { get; } + + public TensorShape? Output1 { get; } + + public SessionTensorInfo(InferenceSession session) + { + var inputMetadata = session.InputMetadata.Values; + var outputMetadata = session.OutputMetadata.Values; + + Input0 = new TensorShape(inputMetadata.First().Dimensions); + Output0 = new TensorShape(outputMetadata.First().Dimensions); + + if (session.OutputMetadata.Count == 2) + { + Output1 = new TensorShape(outputMetadata.Last().Dimensions); + } + } +} \ No newline at end of file diff --git a/Source/YoloV8/Metadata/YoloArchitecture.cs b/Source/YoloV8/Metadata/YoloArchitecture.cs new file mode 100644 index 0000000..66633b4 --- /dev/null +++ b/Source/YoloV8/Metadata/YoloArchitecture.cs @@ -0,0 +1,7 @@ +namespace Compunet.YoloV8.Metadata; + +public enum YoloArchitecture +{ + YoloV8, + YoloV10, +} diff --git a/Source/YoloV8/Metadata/YoloMetadata.cs b/Source/YoloV8/Metadata/YoloMetadata.cs new file mode 100644 index 0000000..2693047 --- /dev/null +++ b/Source/YoloV8/Metadata/YoloMetadata.cs @@ -0,0 +1,105 @@ +namespace Compunet.YoloV8.Metadata; + +public class YoloMetadata +{ + public string Author { get; } + + public string Description { get; } + + public string Version { get; } + + public int BatchSize { get; } + + public Size ImageSize { get; } + + public YoloTask Task { get; } + + public YoloName[] Names { get; } + + public YoloArchitecture Architecture { get; } + + internal YoloMetadata(InferenceSession session) + { + var metadata = session.ModelMetadata.CustomMetadataMap; + + Author = metadata["author"]; + Description = metadata["description"]; + Version = metadata["version"]; + + Task = metadata["task"] switch + { + "obb" => YoloTask.Obb, + "pose" => YoloTask.Pose, + "detect" => YoloTask.Detect, + "segment" => YoloTask.Segment, + "classify" => YoloTask.Classify, + _ => throw new InvalidOperationException("Unknow YoloV8 'task' value") + }; + + if (Task == YoloTask.Detect && session.OutputMetadata.Values.First().Dimensions[2] == 6) // YOLOv10 output shape => [, 300, 6] + { + Architecture = YoloArchitecture.YoloV10; + } + + BatchSize = int.Parse(metadata["batch"]); + ImageSize = ParseSize(metadata["imgsz"]); + Names = ParseNames(metadata["names"]); + } + + public static YoloMetadata Parse(InferenceSession session) + { + try + { + if (session.ModelMetadata.CustomMetadataMap["task"] == "pose") + { + return new YoloPoseMetadata(session); + } + + return new YoloMetadata(session); + } + catch (Exception inner) + { + throw new InvalidOperationException("The metadata parsing failed, making sure you use an official YOLOv8 model", inner); + } + } + + #region Parsers + + private static Size ParseSize(string text) + { + text = text[1..^1]; // '[640, 640]' => '640, 640' + + var split = text.Split(", "); + + var y = int.Parse(split[0]); + var x = int.Parse(split[1]); + + return new Size(x, y); + } + + private static YoloName[] ParseNames(string text) + { + text = text[1..^1]; + + var split = text.Split(", "); + var count = split.Length; + + var names = new YoloName[count]; + + for (int i = 0; i < count; i++) + { + var value = split[i]; + + var valueSplit = value.Split(": "); + + var id = int.Parse(valueSplit[0]); + var name = valueSplit[1][1..^1].Replace('_', ' '); + + names[i] = new YoloName(id, name); + } + + return names; + } + + #endregion +} \ No newline at end of file diff --git a/Source/YoloV8/Metadata/YoloV8Class.cs b/Source/YoloV8/Metadata/YoloName.cs similarity index 81% rename from Source/YoloV8/Metadata/YoloV8Class.cs rename to Source/YoloV8/Metadata/YoloName.cs index 17c472b..42c297e 100644 --- a/Source/YoloV8/Metadata/YoloV8Class.cs +++ b/Source/YoloV8/Metadata/YoloName.cs @@ -1,6 +1,6 @@ namespace Compunet.YoloV8.Metadata; -public class YoloV8Class(int id, string name) +public class YoloName(int id, string name) { public int Id { get; } = id; diff --git a/Source/YoloV8/Metadata/YoloPoseMetadata.cs b/Source/YoloV8/Metadata/YoloPoseMetadata.cs new file mode 100644 index 0000000..4e2cb2a --- /dev/null +++ b/Source/YoloV8/Metadata/YoloPoseMetadata.cs @@ -0,0 +1,25 @@ +namespace Compunet.YoloV8.Metadata; + +public class YoloPoseMetadata : YoloMetadata +{ + public KeypointShape KeypointShape { get; } + + internal YoloPoseMetadata(InferenceSession session) : base(session) + { + var metadata = session.ModelMetadata.CustomMetadataMap; + + KeypointShape = ParseKeypointShape(metadata["kpt_shape"]); + } + + private static KeypointShape ParseKeypointShape(string text) + { + text = text[1..^1]; // '[17, 3]' => '17, 3' + + var split = text.Split(", "); + + var count = int.Parse(split[0]); + var channels = int.Parse(split[1]); + + return new KeypointShape(count, channels); + } +} \ No newline at end of file diff --git a/Source/YoloV8/Metadata/YoloV8Task.cs b/Source/YoloV8/Metadata/YoloTask.cs similarity index 81% rename from Source/YoloV8/Metadata/YoloV8Task.cs rename to Source/YoloV8/Metadata/YoloTask.cs index e97f0bb..9e9945b 100644 --- a/Source/YoloV8/Metadata/YoloV8Task.cs +++ b/Source/YoloV8/Metadata/YoloTask.cs @@ -1,6 +1,6 @@ namespace Compunet.YoloV8.Metadata; -public enum YoloV8Task +public enum YoloTask { Obb, Detect, diff --git a/Source/YoloV8/Metadata/YoloV8Metadata.cs b/Source/YoloV8/Metadata/YoloV8Metadata.cs deleted file mode 100644 index bd2efab..0000000 --- a/Source/YoloV8/Metadata/YoloV8Metadata.cs +++ /dev/null @@ -1,120 +0,0 @@ -namespace Compunet.YoloV8.Metadata; - -public class YoloV8Metadata(string author, - string description, - string version, - YoloV8Task task, - int batch, - Size imageSize, - IReadOnlyList names) -{ - public static YoloV8Metadata Parse(IDictionary metadata) - { - var author = metadata["author"]; - var description = metadata["description"]; - var version = metadata["version"]; - - var task = metadata["task"] switch - { - "obb" => YoloV8Task.Obb, - "pose" => YoloV8Task.Pose, - "detect" => YoloV8Task.Detect, - "segment" => YoloV8Task.Segment, - "classify" => YoloV8Task.Classify, - _ => throw new InvalidOperationException("Unknow YoloV8 'task' value") - }; - - var batch = int.Parse(metadata["batch"]); - - var imageSize = ParseSize(metadata["imgsz"]); - var classes = ParseNames(metadata["names"]); - - if (task is YoloV8Task.Pose) - { - var keypointShape = ParseKeypointShape(metadata["kpt_shape"]); - - return new YoloV8PoseMetadata(author, - description, - version, - task, - batch, - imageSize, - classes, - keypointShape); - } - - return new YoloV8Metadata(author, - description, - version, - task, - batch, - imageSize, - classes); - } - - public string Author { get; } = author; - - public string Description { get; } = description; - - public string Version { get; } = version; - - public YoloV8Task Task { get; } = task; - - public int Batch { get; } = batch; - - public Size ImageSize { get; } = imageSize; - - public IReadOnlyList Names { get; } = names; - - #region Static Parsers - - private static Size ParseSize(string text) - { - text = text[1..^1]; // '[640, 641]' => '640, 640' - - var split = text.Split(", "); - - var y = int.Parse(split[0]); - var x = int.Parse(split[1]); - - return new Size(x, y); - } - - private static KeypointShape ParseKeypointShape(string text) - { - text = text[1..^1]; // '[17, 3]' => '17, 3' - - var split = text.Split(", "); - - var count = int.Parse(split[0]); - var channels = int.Parse(split[1]); - - return new KeypointShape(count, channels); - } - - private static YoloV8Class[] ParseNames(string text) - { - text = text[1..^1]; - - var split = text.Split(", "); - var count = split.Length; - - var names = new YoloV8Class[count]; - - for (int i = 0; i < count; i++) - { - var value = split[i]; - - var valueSplit = value.Split(": "); - - var id = int.Parse(valueSplit[0]); - var name = valueSplit[1][1..^1].Replace('_', ' '); - - names[i] = new YoloV8Class(id, name); - } - - return names; - } - - #endregion -} \ No newline at end of file diff --git a/Source/YoloV8/Metadata/YoloV8PoseMetadata.cs b/Source/YoloV8/Metadata/YoloV8PoseMetadata.cs deleted file mode 100644 index 4306087..0000000 --- a/Source/YoloV8/Metadata/YoloV8PoseMetadata.cs +++ /dev/null @@ -1,25 +0,0 @@ -namespace Compunet.YoloV8.Metadata; - -public class YoloV8PoseMetadata : YoloV8Metadata -{ - public KeypointShape KeypointShape { get; } - - public YoloV8PoseMetadata(string author, - string description, - string version, - YoloV8Task task, - int batch, - Size imageSize, - IReadOnlyList classes, - KeypointShape keypointShape) - : base(author, - description, - version, - task, - batch, - imageSize, - classes) - { - KeypointShape = keypointShape; - } -} \ No newline at end of file diff --git a/Source/YoloV8/Parsers/DetectionOutputParser.cs b/Source/YoloV8/Parsers/DetectionOutputParser.cs deleted file mode 100644 index 736aa4d..0000000 --- a/Source/YoloV8/Parsers/DetectionOutputParser.cs +++ /dev/null @@ -1,25 +0,0 @@ -namespace Compunet.YoloV8.Parsers; - -internal readonly ref struct DetectionOutputParser(YoloV8Metadata metadata, YoloV8Configuration configuration) -{ - public BoundingBox[] Parse(Tensor output, Size originSize) - { - var boxes = new IndexedBoundingBoxParser(metadata, configuration).Parse(output, originSize); - - var result = new BoundingBox[boxes.Length]; - - for (int i = 0; i < boxes.Length; i++) - { - var box = boxes[i]; - - result[i] = new BoundingBox - { - Class = box.Class, - Bounds = box.Bounds, - Confidence = box.Confidence, - }; - } - - return result; - } -} \ No newline at end of file diff --git a/Source/YoloV8/Parsers/IndexedBoundingBox.cs b/Source/YoloV8/Parsers/IndexedBoundingBox.cs deleted file mode 100644 index 79eb22f..0000000 --- a/Source/YoloV8/Parsers/IndexedBoundingBox.cs +++ /dev/null @@ -1,16 +0,0 @@ -namespace Compunet.YoloV8.Parsers; - -internal readonly struct IndexedBoundingBox : IComparable -{ - public bool IsEmpty => Bounds.IsEmpty; - - public required int Index { get; init; } - - public required YoloV8Class Class { get; init; } - - public required Rectangle Bounds { get; init; } - - public required float Confidence { get; init; } - - public int CompareTo(IndexedBoundingBox other) => Confidence.CompareTo(other.Confidence); -} \ No newline at end of file diff --git a/Source/YoloV8/Parsers/IndexedBoundingBoxParser.cs b/Source/YoloV8/Parsers/IndexedBoundingBoxParser.cs deleted file mode 100644 index b42bbdc..0000000 --- a/Source/YoloV8/Parsers/IndexedBoundingBoxParser.cs +++ /dev/null @@ -1,136 +0,0 @@ -namespace Compunet.YoloV8.Parsers; - -internal readonly ref struct IndexedBoundingBoxParser(YoloV8Metadata metadata, YoloV8Configuration configuration) -{ - private readonly ArrayPool _boxesArrayPool = ArrayPool.Create(); - - public IndexedBoundingBox[] Parse(Tensor output, Size originSize) - { - int xPadding; - int yPadding; - - if (configuration.KeepOriginalAspectRatio) - { - var reductionRatio = Math.Min(metadata.ImageSize.Width / (float)originSize.Width, - metadata.ImageSize.Height / (float)originSize.Height); - - xPadding = (int)((metadata.ImageSize.Width - originSize.Width * reductionRatio) / 2); - yPadding = (int)((metadata.ImageSize.Height - originSize.Height * reductionRatio) / 2); - } - else - { - xPadding = 0; - yPadding = 0; - } - - return Parse(output, originSize, xPadding, yPadding); - } - - public IndexedBoundingBox[] Parse(Tensor output, Size originSize, int xPadding, int yPadding) - { - var xRatio = (float)originSize.Width / metadata.ImageSize.Width; - var yRatio = (float)originSize.Height / metadata.ImageSize.Height; - - if (configuration.KeepOriginalAspectRatio) - { - var maxRatio = Math.Max(xRatio, yRatio); - - xRatio = maxRatio; - yRatio = maxRatio; - } - - return Parse(output, originSize, xPadding, yPadding, xRatio, yRatio); - } - - public IndexedBoundingBox[] Parse(Tensor output, Size originSize, int xPadding, int yPadding, float xRatio, float yRatio) - { - var _metadata = metadata; - var _configuration = configuration; - - var boxesCount = output.Dimensions[2]; - var boxes = _boxesArrayPool.Rent(boxesCount); - - try - { - Parallel.For(0, boxesCount, i => - { - for (int j = 0; j < _metadata.Names.Count; j++) - { - var confidence = output[0, j + 4, i]; - - if (confidence <= _configuration.Confidence) - { - continue; - } - - var x = output[0, 0, i]; - var y = output[0, 1, i]; - var w = output[0, 2, i]; - var h = output[0, 3, i]; - - var xMin = (int)((x - w / 2 - xPadding) * xRatio); - var yMin = (int)((y - h / 2 - yPadding) * yRatio); - var xMax = (int)((x + w / 2 - xPadding) * xRatio); - var yMax = (int)((y + h / 2 - yPadding) * yRatio); - - xMin = Math.Clamp(xMin, 0, originSize.Width); - yMin = Math.Clamp(yMin, 0, originSize.Height); - xMax = Math.Clamp(xMax, 0, originSize.Width); - yMax = Math.Clamp(yMax, 0, originSize.Height); - - var name = _metadata.Names[j]; - var bounds = Rectangle.FromLTRB(xMin, yMin, xMax, yMax); - - if (bounds.Width == 0 || bounds.Height == 0) - { - continue; - } - - boxes[i] = new IndexedBoundingBox - { - Index = i, - Class = name, - Bounds = bounds, - Confidence = confidence - }; - } - }); - - return NonMaxSuppressionHelper.Suppress(GetActiveBoxes(boxes, boxesCount), configuration.IoU); - } - finally - { - _boxesArrayPool.Return(boxes, true); - } - } - - private static IndexedBoundingBox[] GetActiveBoxes(IndexedBoundingBox[] boxes, int boxesCount) - { - var activeCount = 0; - - for (var i = 0; i < boxesCount; i++) - { - if (boxes[i].IsEmpty == false) - { - activeCount++; - } - } - - var activeIndex = 0; - var activeBoxes = new IndexedBoundingBox[activeCount]; - - for (var i = 0; i < boxesCount; i++) - { - var box = boxes[i]; - - if (box.IsEmpty) - { - continue; - } - - activeBoxes[activeIndex++] = box; - } - - return activeBoxes; - } -} \ No newline at end of file diff --git a/Source/YoloV8/Parsers/ObbDetectionOutputParser.cs b/Source/YoloV8/Parsers/ObbDetectionOutputParser.cs deleted file mode 100644 index 13b4b6f..0000000 --- a/Source/YoloV8/Parsers/ObbDetectionOutputParser.cs +++ /dev/null @@ -1,26 +0,0 @@ -namespace Compunet.YoloV8; - -internal readonly ref struct ObbDetectionOutputParser(YoloV8Metadata metadata, YoloV8Configuration configuration) -{ - public ObbBoundingBox[] Parse(Tensor output, Size originSize) - { - var boxes = new ObbIndexedBoundingBoxParser(metadata, configuration).Parse(output, originSize); - - var result = new ObbBoundingBox[boxes.Length]; - - for (int i = 0; i < boxes.Length; i++) - { - var box = boxes[i]; - - result[i] = new ObbBoundingBox - { - Class = box.Class, - Bounds = box.Bounds, - Angle = box.Angle, - Confidence = box.Confidence, - }; - } - - return result; - } -} \ No newline at end of file diff --git a/Source/YoloV8/Parsers/ObbIndexedBoundingBox.cs b/Source/YoloV8/Parsers/ObbIndexedBoundingBox.cs deleted file mode 100644 index 679c293..0000000 --- a/Source/YoloV8/Parsers/ObbIndexedBoundingBox.cs +++ /dev/null @@ -1,18 +0,0 @@ -namespace Compunet.YoloV8.Parsers; - -internal readonly struct ObbIndexedBoundingBox : IComparable -{ - public bool IsEmpty => Bounds == default; - - public required int Index { get; init; } - - public required YoloV8Class Class { get; init; } - - public required Rectangle Bounds { get; init; } - - public required float Angle { get; init; } - - public required float Confidence { get; init; } - - public int CompareTo(ObbIndexedBoundingBox other) => Confidence.CompareTo(other.Confidence); -} \ No newline at end of file diff --git a/Source/YoloV8/Parsers/ObbIndexedBoundingBoxParser.cs b/Source/YoloV8/Parsers/ObbIndexedBoundingBoxParser.cs deleted file mode 100644 index a62e9b6..0000000 --- a/Source/YoloV8/Parsers/ObbIndexedBoundingBoxParser.cs +++ /dev/null @@ -1,143 +0,0 @@ -// https://github.com/ultralytics/ultralytics/issues/7667 - -namespace Compunet.YoloV8; - -internal readonly ref struct ObbIndexedBoundingBoxParser(YoloV8Metadata metadata, YoloV8Configuration configuration) -{ - private static readonly ArrayPool _boxesArrayPool = ArrayPool.Create(); - - public ObbIndexedBoundingBox[] Parse(Tensor output, Size originSize) - { - int xPadding; - int yPadding; - - if (configuration.KeepOriginalAspectRatio) - { - var reductionRatio = Math.Min(metadata.ImageSize.Width / (float)originSize.Width, - metadata.ImageSize.Height / (float)originSize.Height); - - xPadding = (int)((metadata.ImageSize.Width - originSize.Width * reductionRatio) / 2); - yPadding = (int)((metadata.ImageSize.Height - originSize.Height * reductionRatio) / 2); - } - else - { - xPadding = 0; - yPadding = 0; - } - - return Parse(output, originSize, xPadding, yPadding); - } - - public ObbIndexedBoundingBox[] Parse(Tensor output, Size originSize, int xPadding, int yPadding) - { - var xRatio = (float)originSize.Width / metadata.ImageSize.Width; - var yRatio = (float)originSize.Height / metadata.ImageSize.Height; - - if (configuration.KeepOriginalAspectRatio) - { - var maxRatio = Math.Max(xRatio, yRatio); - - xRatio = maxRatio; - yRatio = maxRatio; - } - - return Parse(output, xPadding, yPadding, xRatio, yRatio); - } - - public ObbIndexedBoundingBox[] Parse(Tensor output, int xPadding, int yPadding, float xRatio, float yRatio) - { - var _metadata = metadata; - var _parameters = configuration; - - var detectionDataSize = output.Dimensions[1]; - var boxesCount = output.Dimensions[2]; - var boxes = _boxesArrayPool.Rent(boxesCount); - - try - { - Parallel.For(0, boxesCount, i => - { - var maxConfidence = _parameters.Confidence; - var maxConfidenceIndex = -1; - - for (int j = 0; j < _metadata.Names.Count; j++) - { - var confidence = output[0, j + 4, i]; - - if (confidence > maxConfidence) - { - maxConfidence = confidence; - maxConfidenceIndex = j; - } - } - - if (maxConfidenceIndex == -1) - { - return; - } - - var x = (int)((output[0, 0, i] - xPadding) * xRatio); - var y = (int)((output[0, 1, i] - yPadding) * yRatio); - var w = (int)(output[0, 2, i] * xRatio); - var h = (int)(output[0, 3, i] * yRatio); - - var bounds = new Rectangle(x, y, w, h); - - var angle = (output[0, detectionDataSize - 1, i]); // Radians - - // Angle in [-pi/4,3/4 pi) --》 [-pi/2,pi/2) - if (angle >= MathF.PI && angle <= 0.75 * MathF.PI) - { - angle -= MathF.PI; - } - - var name = _metadata.Names[maxConfidenceIndex]; - - boxes[i] = new ObbIndexedBoundingBox - { - Index = i, - Class = name, - Bounds = bounds, - Angle = angle * 180 / MathF.PI, // Degrees - Confidence = maxConfidence - }; - }); - - return ObbNonMaxSuppressionHelper.Suppress(GetActiveBoxes(boxes, boxesCount), configuration.IoU); - } - finally - { - _boxesArrayPool.Return(boxes, true); - } - } - - private static ObbIndexedBoundingBox[] GetActiveBoxes(ObbIndexedBoundingBox[] boxes, int boxesCount) - { - var activeCount = 0; - - for (var i = 0; i < boxesCount; i++) - { - if (boxes[i].IsEmpty == false) - { - activeCount++; - } - } - - var activeIndex = 0; - var activeBoxes = new ObbIndexedBoundingBox[activeCount]; - - for (var i = 0; i < boxesCount; i++) - { - var box = boxes[i]; - - if (box.IsEmpty) - { - continue; - } - - activeBoxes[activeIndex++] = box; - } - - return activeBoxes; - } -} \ No newline at end of file diff --git a/Source/YoloV8/Parsers/PoseOutputParser.cs b/Source/YoloV8/Parsers/PoseOutputParser.cs deleted file mode 100644 index 20baeb9..0000000 --- a/Source/YoloV8/Parsers/PoseOutputParser.cs +++ /dev/null @@ -1,78 +0,0 @@ -namespace Compunet.YoloV8.Parsers; - -internal readonly ref struct PoseOutputParser(YoloV8Metadata metadata, YoloV8Configuration configuration) -{ - public PoseBoundingBox[] Parse(Tensor output, Size originSize) - { - var poseMetadata = (YoloV8PoseMetadata)metadata; - - int xPadding; - int yPadding; - - var xRatio = (float)originSize.Width / metadata.ImageSize.Width; - var yRatio = (float)originSize.Height / metadata.ImageSize.Height; - - if (configuration.KeepOriginalAspectRatio) - { - var reductionRatio = Math.Min(metadata.ImageSize.Width / (float)originSize.Width, metadata.ImageSize.Height / (float)originSize.Height); - - xPadding = (int)((metadata.ImageSize.Width - originSize.Width * reductionRatio) / 2); - yPadding = (int)((metadata.ImageSize.Height - originSize.Height * reductionRatio) / 2); - - var maxRatio = Math.Max(xRatio, yRatio); - - xRatio = maxRatio; - yRatio = maxRatio; - } - else - { - xPadding = 0; - yPadding = 0; - } - - var boxes = new IndexedBoundingBoxParser(metadata, configuration).Parse(output, originSize, xPadding, yPadding, xRatio, yRatio); - - var shape = poseMetadata.KeypointShape; - - var result = new PoseBoundingBox[boxes.Length]; - - for (int index = 0; index < boxes.Length; index++) - { - var box = boxes[index]; - - var keypoints = new Keypoint[shape.Count]; - - for (int i = 0; i < shape.Count; i++) - { - var offset = i * shape.Channels + 4 + poseMetadata.Names.Count; - - var pointX = (int)((output[0, offset + 0, box.Index] - xPadding) * xRatio); - var pointY = (int)((output[0, offset + 1, box.Index] - yPadding) * yRatio); - - var pointConfidence = poseMetadata.KeypointShape.Channels switch - { - 2 => 1F, - 3 => output[0, offset + 2, box.Index], - _ => throw new NotSupportedException("Unexpected keypoint shape") - }; - - keypoints[i] = new Keypoint - { - Index = i, - Point = new Point(pointX, pointY), - Confidence = pointConfidence - }; - } - - result[index] = new PoseBoundingBox - { - Class = box.Class, - Bounds = box.Bounds, - Confidence = box.Confidence, - Keypoints = keypoints - }; - } - - return result; - } -} \ No newline at end of file diff --git a/Source/YoloV8/Parsers/SegmentationOutputParser.cs b/Source/YoloV8/Parsers/SegmentationOutputParser.cs deleted file mode 100644 index 2076f82..0000000 --- a/Source/YoloV8/Parsers/SegmentationOutputParser.cs +++ /dev/null @@ -1,156 +0,0 @@ -namespace Compunet.YoloV8.Parsers; - -internal readonly ref struct SegmentationOutputParser(YoloV8Metadata metadata, YoloV8Configuration configuration) -{ - public SegmentationBoundingBox[] Parse(Tensor boxesOutput, Tensor maskPrototypes, Size originSize) - { - var _metadata = metadata; - - int xPadding; - int yPadding; - - if (configuration.KeepOriginalAspectRatio) - { - var reductionRatio = Math.Min(metadata.ImageSize.Width / (float)originSize.Width, metadata.ImageSize.Height / (float)originSize.Height); - - xPadding = (int)((metadata.ImageSize.Width - originSize.Width * reductionRatio) / 2); - yPadding = (int)((metadata.ImageSize.Height - originSize.Height * reductionRatio) / 2); - } - else - { - xPadding = 0; - yPadding = 0; - } - - var maskChannelCount = boxesOutput.Dimensions[1] - 4 - metadata.Names.Count; - - var boxes = new IndexedBoundingBoxParser(_metadata, configuration).Parse(boxesOutput, originSize, xPadding, yPadding); - - var result = new SegmentationBoundingBox[boxes.Length]; - - for (int index = 0; index < boxes.Length; index++) - { - var box = boxes[index]; - - var maskWeights = ExtractMaskWeights(boxesOutput, box.Index, maskChannelCount, _metadata.Names.Count + 4); - - var mask = ProcessMask(maskPrototypes, maskWeights, box.Bounds, originSize, _metadata.ImageSize, xPadding, yPadding); - - result[index] = new SegmentationBoundingBox - { - Mask = mask, - Class = box.Class, - Bounds = box.Bounds, - Confidence = box.Confidence, - }; - } - - return result; - } - - private static SegmentationMask ProcessMask(Tensor prototypes, - float[] weights, - Rectangle bounds, - Size originSize, - Size modelSize, - int xPadding, - int yPadding) - { - var maskChannels = prototypes.Dimensions[1]; - var maskHeight = prototypes.Dimensions[2]; - var maskWidth = prototypes.Dimensions[3]; - - if (maskChannels != weights.Length) - { - throw new InvalidOperationException(); - } - - using var bitmap = new Image(maskWidth, maskHeight); - - for (int y = 0; y < maskHeight; y++) - { - for (int x = 0; x < maskWidth; x++) - { - var value = 0F; - - for (int i = 0; i < maskChannels; i++) - value += prototypes[0, i, y, x] * weights[i]; - - value = Sigmoid(value); - - var color = GetLuminance(value); - var pixel = new L8(color); - - bitmap[x, y] = pixel; - } - } - - var xPad = xPadding * maskWidth / modelSize.Width; - var yPad = yPadding * maskHeight / modelSize.Height; - - var paddingCropRectangle = new Rectangle(xPad, - yPad, - maskWidth - xPad * 2, - maskHeight - yPad * 2); - - bitmap.Mutate(x => - { - // Crop for preprocess resize padding - x.Crop(paddingCropRectangle); - - // Resize to original image size - x.Resize(originSize); - - // Crop for getting the object segmentation only - x.Crop(bounds); - }); - - var final = new float[bounds.Width, bounds.Height]; - - bitmap.EnumeratePixels((point, pixel) => - { - var confidence = GetConfidence(pixel.PackedValue); - final[point.X, point.Y] = confidence; - }); - - return new SegmentationMask - { - Mask = final - }; - } - - private static float[] ExtractMaskWeights(Tensor output, int boxIndex, int maskChannelCount, int maskWeightsOffset) - { - var maskWeights = new float[maskChannelCount]; - - for (int i = 0; i < maskChannelCount; i++) - { - maskWeights[i] = output[0, maskWeightsOffset + i, boxIndex]; - } - - return maskWeights; - } - - #region Helpers - - private static float Sigmoid(float value) - { - //return 1 / (1 + MathF.Exp(-value)); - - var k = MathF.Exp(value); - - return k / (1.0f + k); - } - - private static byte GetLuminance(float confidence) - { - return (byte)((confidence * 255 - 255) * -1); - } - - private static float GetConfidence(byte luminance) - { - return (luminance - 255) * -1 / 255F; - } - - #endregion -} \ No newline at end of file diff --git a/Source/YoloV8/Parsing/IRawBoundingBox.cs b/Source/YoloV8/Parsing/IRawBoundingBox.cs new file mode 100644 index 0000000..24f1e2c --- /dev/null +++ b/Source/YoloV8/Parsing/IRawBoundingBox.cs @@ -0,0 +1,18 @@ +namespace Compunet.YoloV8.Parsing; + +internal interface IRawBoundingBox : IComparable +{ + public static abstract TSelf Empty { get; } + + public bool IsEmpty => Bounds.IsEmpty; + + public YoloName Name { get; } + + public Rectangle Bounds { get; } + + public float Confidence { get; } + + public static abstract float CalculateIoU(ref TSelf box1, ref TSelf box2); + + public static abstract TSelf Parse(ref RawParsingContext context, int index, YoloName name, float confidence, YoloArchitecture architecture); +} \ No newline at end of file diff --git a/Source/YoloV8/Parsing/RawBoundingBox.cs b/Source/YoloV8/Parsing/RawBoundingBox.cs new file mode 100644 index 0000000..4bb761a --- /dev/null +++ b/Source/YoloV8/Parsing/RawBoundingBox.cs @@ -0,0 +1,92 @@ +namespace Compunet.YoloV8.Parsing; + +internal readonly struct RawBoundingBox : IRawBoundingBox +{ + public static RawBoundingBox Empty { get; } = default; + + public required int Index { get; init; } + + public required YoloName Name { get; init; } + + public required Rectangle Bounds { get; init; } + + public required float Confidence { get; init; } + + public static float CalculateIoU(ref RawBoundingBox box1, ref RawBoundingBox box2) + { + var rect1 = box1.Bounds; + var rect2 = box2.Bounds; + + var area1 = rect1.Width * rect1.Height; + + if (area1 <= 0f) + { + return 0f; + } + + var area2 = rect2.Width * rect2.Height; + + if (area2 <= 0f) + { + return 0f; + } + + var intersection = Rectangle.Intersect(rect1, rect2); + var intersectionArea = intersection.Width * intersection.Height; + + return (float)intersectionArea / (area1 + area2 - intersectionArea); + } + + public static RawBoundingBox Parse(ref RawParsingContext context, int index, YoloName name, float confidence, YoloArchitecture architecture) + { + var tensor = context.Tensor; + var tensorSpan = tensor.Buffer.Span; + var stride1 = context.Stride1; + var padding = context.Padding; + var ratio = context.Ratio; + + int xMin; + int yMin; + int xMax; + int yMax; + + if (architecture == YoloArchitecture.YoloV10) + { + var boxOffset = index * stride1; + + var x = tensorSpan[boxOffset + 0]; + var y = tensorSpan[boxOffset + 1]; + var w = tensorSpan[boxOffset + 2]; + var h = tensorSpan[boxOffset + 3]; + + xMin = (int)((x - padding.X) * ratio.X); + yMin = (int)((y - padding.Y) * ratio.Y); + xMax = (int)((w - padding.X) * ratio.X); + yMax = (int)((h - padding.Y) * ratio.X); + } + else // YOLOv8 + { + var x = tensorSpan[0 + index]; + var y = tensorSpan[1 * stride1 + index]; + var w = tensorSpan[2 * stride1 + index]; + var h = tensorSpan[3 * stride1 + index]; + + xMin = (int)((x - w / 2 - padding.X) * ratio.X); + yMin = (int)((y - h / 2 - padding.Y) * ratio.Y); + xMax = (int)((x + w / 2 - padding.X) * ratio.X); + yMax = (int)((y + h / 2 - padding.Y) * ratio.Y); + } + + var bounds = Rectangle.FromLTRB(xMin, yMin, xMax, yMax); + + return new RawBoundingBox + { + Index = index, + Bounds = bounds, + Name = name, + Confidence = confidence, + }; + } + + public int CompareTo(RawBoundingBox other) => Confidence.CompareTo(other.Confidence); +} \ No newline at end of file diff --git a/Source/YoloV8/Parsing/RawObbBoundingBox.cs b/Source/YoloV8/Parsing/RawObbBoundingBox.cs new file mode 100644 index 0000000..78e8253 --- /dev/null +++ b/Source/YoloV8/Parsing/RawObbBoundingBox.cs @@ -0,0 +1,102 @@ +namespace Compunet.YoloV8.Parsing; + +internal readonly struct RawObbBoundingBox : IRawBoundingBox +{ + public static RawObbBoundingBox Empty { get; } = default; + + public required int Index { get; init; } + + public required YoloName Name { get; init; } + + public required Rectangle Bounds { get; init; } + + public required float Angle { get; init; } + + public required float Confidence { get; init; } + + public int CompareTo(RawObbBoundingBox other) => Confidence.CompareTo(other.Confidence); + + public static float CalculateIoU(ref RawObbBoundingBox box1, ref RawObbBoundingBox box2) + { + var rect1 = box1.Bounds; + var rect2 = box2.Bounds; + + var area1 = rect1.Width * rect1.Height; + + if (area1 <= 0f) + { + return 0f; + } + + var area2 = rect2.Width * rect2.Height; + + if (area2 <= 0f) + { + return 0f; + } + + var vertices1 = box1.GetCornerPoints(); + var vertices2 = box2.GetCornerPoints(); + + var path1 = new Path64(vertices1.Select(v => new Point64(v.X, v.Y))); + var path2 = new Path64(vertices2.Select(v => new Point64(v.X, v.Y))); + + var subject = new Paths64([path1]); + var clip = new Paths64([path2]); + + var intersection = Clipper.Intersect(subject, clip, FillRule.EvenOdd); + var union = Clipper.Union(subject, clip, FillRule.EvenOdd); + + if (intersection.Count == 0 || union.Count == 0) + { + return 0f; + } + + var intersectionArea = Clipper.Area(intersection[0]); + var unionArea = Clipper.Area(union[0]); + + return (float)(intersectionArea / unionArea); + } + + public static RawObbBoundingBox Parse(ref RawParsingContext context, int index, YoloName name, float confidence, YoloArchitecture architecture) + { + var tensorSpan = context.Tensor.Buffer.Span; + var stride1 = context.Stride1; + var padding = context.Padding; + var ratio = context.Ratio; + var nameCount = context.NameCount; + + if (nameCount == 0) + { + throw new ArgumentException(nameof(nameCount)); + } + + var x = (tensorSpan[index] - padding.X) * ratio.X; + var y = (tensorSpan[1 * stride1 + index] - padding.Y) * ratio.X; + var w = tensorSpan[2 * stride1 + index] * ratio.X; + var h = tensorSpan[3 * stride1 + index] * ratio.Y; + + // Radians + var angle = tensorSpan[(4 + nameCount) * stride1 + index]; + + // Angle in [-pi/4,3/4 pi) -> [-pi/2,pi/2) + if (angle >= MathF.PI && angle <= 0.75 * MathF.PI) + { + angle -= MathF.PI; + } + + // Degrees + angle *= 180f / MathF.PI; + + var bounds = new Rectangle((int)x, (int)y, (int)w, (int)h); + + return new RawObbBoundingBox + { + Index = index, + Name = name, + Angle = angle, + Bounds = bounds, + Confidence = confidence + }; + } +} \ No newline at end of file diff --git a/Source/YoloV8/Parsing/RawParsingContext.cs b/Source/YoloV8/Parsing/RawParsingContext.cs new file mode 100644 index 0000000..88c3d07 --- /dev/null +++ b/Source/YoloV8/Parsing/RawParsingContext.cs @@ -0,0 +1,14 @@ +namespace Compunet.YoloV8.Parsing; + +internal readonly ref struct RawParsingContext +{ + public required DenseTensor Tensor { get; init; } + + public required Vector Padding { get; init; } + + public required Vector Ratio { get; init; } + + public required int Stride1 { get; init; } + + public int NameCount { get; init; } +} \ No newline at end of file diff --git a/Source/YoloV8/Plotting/ColorPalette.cs b/Source/YoloV8/Plotting/ColorPalette.cs index 19bb020..2af6aa3 100644 --- a/Source/YoloV8/Plotting/ColorPalette.cs +++ b/Source/YoloV8/Plotting/ColorPalette.cs @@ -30,8 +30,8 @@ public Color GetColor(int index) private static ColorPalette CreateDefault() { - return new ColorPalette(new string[] - { + return new ColorPalette( + [ "FF3838", "FF9D97", "FF701F", @@ -52,6 +52,6 @@ private static ColorPalette CreateDefault() "CB38FF", "FF95C8", "FF37C7", - }); + ]); } } \ No newline at end of file diff --git a/Source/YoloV8/Plotting/ClassificationPlottingOptions.cs b/Source/YoloV8/Plotting/Options/ClassificationPlottingOptions.cs similarity index 100% rename from Source/YoloV8/Plotting/ClassificationPlottingOptions.cs rename to Source/YoloV8/Plotting/Options/ClassificationPlottingOptions.cs diff --git a/Source/YoloV8/Plotting/DetectionPlottingOptions.cs b/Source/YoloV8/Plotting/Options/DetectionPlottingOptions.cs similarity index 66% rename from Source/YoloV8/Plotting/DetectionPlottingOptions.cs rename to Source/YoloV8/Plotting/Options/DetectionPlottingOptions.cs index d61255c..4c6e6d3 100644 --- a/Source/YoloV8/Plotting/DetectionPlottingOptions.cs +++ b/Source/YoloV8/Plotting/Options/DetectionPlottingOptions.cs @@ -4,7 +4,9 @@ public class DetectionPlottingOptions : PlottingOptions { public static DetectionPlottingOptions Default { get; } = new DetectionPlottingOptions(); - public float TextHorizontalPadding { get; set; } + public float LabelTextXPadding { get; set; } + + public float LabelTextYPadding { get; set; } public float BoxBorderThickness { get; set; } @@ -12,8 +14,9 @@ public class DetectionPlottingOptions : PlottingOptions public DetectionPlottingOptions() { - TextHorizontalPadding = 5F; - BoxBorderThickness = 1F; + LabelTextXPadding = 6f; + LabelTextYPadding = 4f; + BoxBorderThickness = 1f; ColorPalette = ColorPalette.Default; } } \ No newline at end of file diff --git a/Source/YoloV8/Plotting/PlottingOptions.cs b/Source/YoloV8/Plotting/Options/PlottingOptions.cs similarity index 96% rename from Source/YoloV8/Plotting/PlottingOptions.cs rename to Source/YoloV8/Plotting/Options/PlottingOptions.cs index a8c93f7..52b176c 100644 --- a/Source/YoloV8/Plotting/PlottingOptions.cs +++ b/Source/YoloV8/Plotting/Options/PlottingOptions.cs @@ -9,7 +9,7 @@ public abstract class PlottingOptions public PlottingOptions() { FontFamily = GetDefaultFontFamily(); - FontSize = 12F; + FontSize = 12f; } private static FontFamily GetDefaultFontFamily() diff --git a/Source/YoloV8/Plotting/PosePlottingOptions.cs b/Source/YoloV8/Plotting/Options/PosePlottingOptions.cs similarity index 100% rename from Source/YoloV8/Plotting/PosePlottingOptions.cs rename to Source/YoloV8/Plotting/Options/PosePlottingOptions.cs diff --git a/Source/YoloV8/Plotting/SegmentationPlottingOptions.cs b/Source/YoloV8/Plotting/Options/SegmentationPlottingOptions.cs similarity index 100% rename from Source/YoloV8/Plotting/SegmentationPlottingOptions.cs rename to Source/YoloV8/Plotting/Options/SegmentationPlottingOptions.cs diff --git a/Source/YoloV8/Plotting/PlottingAsyncExtensions.cs b/Source/YoloV8/Plotting/PlottingAsyncExtensions.cs new file mode 100644 index 0000000..cad1ba2 --- /dev/null +++ b/Source/YoloV8/Plotting/PlottingAsyncExtensions.cs @@ -0,0 +1,29 @@ +namespace Compunet.YoloV8.Plotting; + +public static class PlottingAsyncExtensions +{ + public static async Task PlotImageAsync(this YoloResult result, Image originImage, PosePlottingOptions? options = null) + { + return await Task.Run(() => result.PlotImage(originImage, options)); + } + + public static async Task PlotImageAsync(this YoloResult result, Image originImage, DetectionPlottingOptions? options = null) + { + return await Task.Run(() => result.PlotImage(originImage, options)); + } + + public static async Task PlotImageAsync(this YoloResult result, Image originImage, DetectionPlottingOptions? options = null) + { + return await Task.Run(() => result.PlotImage(originImage, options)); + } + + public static async Task PlotImageAsync(this YoloResult result, Image originImage, SegmentationPlottingOptions? options = null) + { + return await Task.Run(() => result.PlotImage(originImage, options)); + } + + public static async Task PlotImageAsync(this YoloResult result, Image originImage, ClassificationPlottingOptions? options = null) + { + return await Task.Run(() => result.PlotImage(originImage, options)); + } +} \ No newline at end of file diff --git a/Source/YoloV8/Plotting/PlottingAsyncOperationExtensions.cs b/Source/YoloV8/Plotting/PlottingAsyncOperationExtensions.cs deleted file mode 100644 index 0adda81..0000000 --- a/Source/YoloV8/Plotting/PlottingAsyncOperationExtensions.cs +++ /dev/null @@ -1,30 +0,0 @@ -namespace Compunet.YoloV8.Plotting; - -public static class PlottingAsyncOperationExtensions -{ - public static async Task PlotImageAsync(this PoseResult result, Image originImage, PosePlottingOptions? options = null) - { - return await Task.Run(() => result.PlotImage(originImage, options)); - } - - public static async Task PlotImageAsync(this DetectionResult result, Image originImage, DetectionPlottingOptions? options = null) - { - return await Task.Run(() => result.PlotImage(originImage, options)); - } - - public static async Task PlotImageAsync(this ObbDetectionResult result, Image originImage, DetectionPlottingOptions? options = null) - { - return await Task.Run(() => result.PlotImage(originImage, options)); - } - - public static async Task PlotImageAsync(this SegmentationResult result, Image originImage, SegmentationPlottingOptions? options = null) - { - return await Task.Run(() => result.PlotImage(originImage, options)); - } - - - public static async Task PlotImageAsync(this ClassificationResult result, Image originImage, ClassificationPlottingOptions? options = null) - { - return await Task.Run(() => result.PlotImage(originImage, options)); - } -} \ No newline at end of file diff --git a/Source/YoloV8/Plotting/PlottingExtensions.cs b/Source/YoloV8/Plotting/PlottingExtensions.cs index 703f96e..2afa5e3 100644 --- a/Source/YoloV8/Plotting/PlottingExtensions.cs +++ b/Source/YoloV8/Plotting/PlottingExtensions.cs @@ -1,52 +1,51 @@ -namespace Compunet.YoloV8.Plotting; +using System.Numerics; + +namespace Compunet.YoloV8.Plotting; public static class PlottingExtensions { - public static Image PlotImage(this PoseResult result, ImageSelector originImage, PosePlottingOptions? options = null) + public static Image PlotImage(this YoloResult result, Image image, PosePlottingOptions? options = null) { options ??= PosePlottingOptions.Default; - var process = originImage.Load(true); - - EnsureSize(process.Size, result.Image); + var target = image.CloneAs(); - var size = result.Image; + target.AutoOrient(); - var ratio = Math.Max(size.Width, size.Height) / 640F; + ValidateSize(image.Size, result.ImageSize); + var ratio = GetRatio(image.Size); var textOptions = new TextOptions(options.FontFamily.CreateFont(options.FontSize * ratio)); - - var textPadding = options.TextHorizontalPadding * ratio; - + var textPadding = new Vector2(options.LabelTextXPadding, options.LabelTextYPadding) * ratio; var boxBorderThickness = options.BoxBorderThickness * ratio; - var radius = options.KeypointRadius * ratio; var lineThickness = options.KeypointLineThickness * ratio; - foreach (var box in result.Boxes) + foreach (var box in result) { - var label = $"{box.Class.Name} {box.Confidence:N}"; - var color = options.ColorPalette.GetColor(box.Class.Id); + var label = $"{box.Name.Name} {box.Confidence:N}"; + var color = options.ColorPalette.GetColor(box.Name.Id); var points = GetPoints(box); var textLocation = points[0]; - process.Mutate(context => + target.Mutate(context => { - DrawBoundingBox(context, points, color, boxBorderThickness, .1f); - - DrawTextLabel(context, label, textLocation, color, boxBorderThickness, textPadding, textOptions); + context.DrawBox(points, color, boxBorderThickness, .1f); + context.DrawLabel(label, textLocation, color, boxBorderThickness, textPadding, textOptions); // Draw lines - for (int i = 0; i < options.Skeleton.Connections.Length; i++) + for (var i = 0; i < options.Skeleton.Connections.Length; i++) { var connection = options.Skeleton.Connections[i]; - var first = box.Keypoints.ElementAt(connection.First); - var second = box.Keypoints.ElementAt(connection.Second); + var first = box[connection.First]; + var second = box[connection.Second]; if (first.Confidence < options.KeypointConfidence || second.Confidence < options.KeypointConfidence) + { continue; + } var points = new PointF[] { @@ -60,10 +59,12 @@ public static Image PlotImage(this PoseResult result, ImageSelector orig } // Draw keypoints - foreach (var keypoint in box.Keypoints) + foreach (var keypoint in box) { if (keypoint.Confidence < options.KeypointConfidence) + { continue; + } var ellipse = new EllipsePolygon(keypoint.Point, radius); @@ -74,103 +75,94 @@ public static Image PlotImage(this PoseResult result, ImageSelector orig }); } - return process; + return target; } - public static Image PlotImage(this DetectionResult result, ImageSelector originImage, DetectionPlottingOptions? options = null) + public static Image PlotImage(this YoloResult result, Image image, DetectionPlottingOptions? options = null) { options ??= DetectionPlottingOptions.Default; - var process = originImage.Load(true); + var target = image.CloneAs(); - process.Mutate(x => x.AutoOrient()); + target.AutoOrient(); - EnsureSize(process.Size, result.Image); + ValidateSize(image.Size, result.ImageSize); - var size = result.Image; + var ratio = GetRatio(image.Size); - var ratio = Math.Max(size.Width, size.Height) / 640F; - - var textOptions = new TextOptions(options.FontFamily.CreateFont(options.FontSize * ratio)); - - var textPadding = options.TextHorizontalPadding * ratio; + var textOptions = new TextOptions(options.FontFamily.CreateFont(options.FontSize * ratio)) + { + VerticalAlignment = VerticalAlignment.Center, + }; var thickness = options.BoxBorderThickness * ratio; + var textPadding = new Vector2(options.LabelTextXPadding, options.LabelTextYPadding) * ratio; - foreach (var box in result.Boxes) + foreach (var box in result) { - var label = $"{box.Class.Name} {box.Confidence:N}"; - var color = options.ColorPalette.GetColor(box.Class.Id); + var label = $"{box.Name.Name} {box.Confidence:N}"; + var color = options.ColorPalette.GetColor(box.Name.Id); var points = GetPoints(box); var textLocation = points[0]; // The first point is top left - process.Mutate(context => + target.Mutate(context => { - DrawBoundingBox(context, points, color, thickness, .1f); - - DrawTextLabel(context, label, textLocation, color, thickness, textPadding, textOptions); + context.DrawBox(points, color, thickness, .1f); + context.DrawLabel(label, textLocation, color, thickness, textPadding, textOptions); }); } - return process; + return target; } - public static Image PlotImage(this ObbDetectionResult result, ImageSelector originImage, DetectionPlottingOptions? options = null) + public static Image PlotImage(this YoloResult result, Image image, DetectionPlottingOptions? options = null) { options ??= DetectionPlottingOptions.Default; - var process = originImage.Load(true); - - process.Mutate(x => x.AutoOrient()); - - EnsureSize(process.Size, result.Image); + var target = image.CloneAs(); - var size = result.Image; + target.AutoOrient(); - var ratio = Math.Max(size.Width, size.Height) / 640f; + ValidateSize(target.Size, result.ImageSize); + var ratio = GetRatio(image.Size); var textOptions = new TextOptions(options.FontFamily.CreateFont(options.FontSize * ratio)); - - var textPadding = options.TextHorizontalPadding * ratio; - + var textPadding = new Vector2(options.LabelTextXPadding, options.LabelTextYPadding) * ratio; var thickness = options.BoxBorderThickness * ratio; - foreach (var box in result.Boxes) + foreach (var box in result) { - var label = $"{box.Class.Name} {box.Confidence:N}"; - var color = options.ColorPalette.GetColor(box.Class.Id); + var label = $"{box.Name.Name} {box.Confidence:N}"; + var color = options.ColorPalette.GetColor(box.Name.Id); var points = GetPoints(box); var textLocation = points.MinBy(p => p.Y); - process.Mutate(context => + target.Mutate(context => { - DrawBoundingBox(context, points, color, thickness, .1f); - - DrawTextLabel(context, label, textLocation, color, thickness, textPadding, textOptions); + context.DrawBox(points, color, thickness, .1f); + context.DrawLabel(label, textLocation, color, thickness, textPadding, textOptions); }); } - return process; + return target; } - public static Image PlotImage(this SegmentationResult result, ImageSelector originImage, SegmentationPlottingOptions? options = null) + public static Image PlotImage(this YoloResult result, Image image, SegmentationPlottingOptions? options = null) { options ??= SegmentationPlottingOptions.Default; - var process = originImage.Load(true); - - EnsureSize(process.Size, result.Image); + var target = image.CloneAs(); - var size = result.Image; + target.AutoOrient(); - var ratio = Math.Max(size.Width, size.Height) / 640F; + ValidateSize(target.Size, result.ImageSize); + var size = result.ImageSize; + var ratio = GetRatio(image.Size); var textOptions = new TextOptions(options.FontFamily.CreateFont(options.FontSize * ratio)); - - var textPadding = options.TextHorizontalPadding * ratio; - + var textPadding = new Vector2(options.LabelTextXPadding, options.LabelTextYPadding) * ratio; var thickness = options.BoxBorderThickness * ratio; #region Draw Masks @@ -178,77 +170,76 @@ public static Image PlotImage(this SegmentationResult result, ImageSelector(size.Width, size.Height); using var contoursLayer = new Image(size.Width, size.Height); - foreach (var box in result.Boxes) + foreach (var box in result) { - var color = options.ColorPalette.GetColor(box.Class.Id); + var color = options.ColorPalette.GetColor(box.Name.Id); using var mask = new Image(box.Bounds.Width, box.Bounds.Height); - for (int x = 0; x < box.Mask.Width; x++) + for (var x = 0; x < box.Mask.Width; x++) { - for (int y = 0; y < box.Mask.Height; y++) + for (var y = 0; y < box.Mask.Height; y++) { var value = box.Mask[x, y]; if (value > options.MaskConfidence) + { mask[x, y] = color; + } } } masksLayer.Mutate(x => x.DrawImage(mask, box.Bounds.Location, 1F)); - if (options.ContoursThickness > 0F) + if (options.ContoursThickness > 0f) { using var contours = CreateContours(mask, color, options.ContoursThickness * ratio); - contoursLayer.Mutate(x => x.DrawImage(contours, box.Bounds.Location, 1F)); + contoursLayer.Mutate(x => x.DrawImage(contours, box.Bounds.Location, 1f)); } } - process.Mutate(x => x.DrawImage(masksLayer, .4F)); - process.Mutate(x => x.DrawImage(contoursLayer, 1F)); + target.Mutate(x => x.DrawImage(masksLayer, .4F)); + target.Mutate(x => x.DrawImage(contoursLayer, 1F)); #endregion #region Draw Boxes - foreach (var box in result.Boxes) + foreach (var box in result) { - var label = $"{box.Class.Name} {box.Confidence:N}"; - var color = options.ColorPalette.GetColor(box.Class.Id); + var label = $"{box.Name.Name} {box.Confidence:N}"; + var color = options.ColorPalette.GetColor(box.Name.Id); var points = GetPoints(box); var textLocation = points[0]; - process.Mutate(context => + target.Mutate(context => { - DrawBoundingBox(context, points, color, thickness, .1f); - - DrawTextLabel(context, label, textLocation, color, thickness, textPadding, textOptions); + context.DrawBox(points, color, thickness, .1f); + context.DrawLabel(label, textLocation, color, thickness, textPadding, textOptions); }); } #endregion - return process; + return target; } - public static Image PlotImage(this ClassificationResult result, ImageSelector originImage, ClassificationPlottingOptions? options = null) + public static Image PlotImage(this YoloResult result, Image image, ClassificationPlottingOptions? options = null) { options ??= ClassificationPlottingOptions.Default; - var process = originImage.Load(true); + var target = image.CloneAs(); - EnsureSize(process.Size, result.Image); + target.AutoOrient(); - var size = result.Image; - - var ratio = Math.Max(size.Width, size.Height) / 640F; + ValidateSize(target.Size, result.ImageSize); + var ratio = GetRatio(image.Size); var textOptions = new TextOptions(options.FontFamily.CreateFont(options.FontSize * ratio)); var label = result.ToString(); - - var classId = result.TopClass.Name.Id; + var classId = result.GetTopClass().Name.Id; var fill = options.FillColorPalette.GetColor(classId); var border = options.BorderColorPalette.GetColor(classId); @@ -257,14 +248,14 @@ public static Image PlotImage(this ClassificationResult result, ImageSelector x.DrawText(label, textOptions.Font, brush, pen, location)); + target.Mutate(x => x.DrawText(label, textOptions.Font, brush, pen, location)); - return process; + return target; } #region Private Methods - private static void DrawBoundingBox(IImageProcessingContext context, PointF[] points, Color color, float thickness, float opacity) + private static void DrawBox(this IImageProcessingContext context, PointF[] points, Color color, float thickness, float opacity) { var polygon = new Polygon(points); @@ -276,16 +267,21 @@ private static void DrawBoundingBox(IImageProcessingContext context, PointF[] po } } - private static void DrawTextLabel(IImageProcessingContext context, string text, PointF location, Color color, float thickness, float padding, TextOptions options) + private static void DrawLabel(this IImageProcessingContext context, string text, PointF location, Color color, float thickness, Vector2 padding, TextOptions options) { - var rendered = TextMeasurer.MeasureSize(text, options); - var renderedSize = new Size((int)(rendered.Width + padding), (int)rendered.Height); + var xPadding = padding.X; + var yPadding = padding.Y; - location.Offset(0, -renderedSize.Height); + var rendered = TextMeasurer.MeasureBounds(text, options); + var labelSize = new SizeF(rendered.Width + xPadding, options.Font.Size + yPadding); - var textLocation = new PointF(location.X + padding / 2, location.Y); + location.Offset(0, -labelSize.Height); - var textBoxPolygon = new RectangularPolygon(location, renderedSize); + var textLocation = new PointF(location.X + xPadding / 2, location.Y + yPadding / 2); + var textBoxPolygon = new RectangularPolygon(location, labelSize); + + // Fix text position + textLocation.Offset(0, -(yPadding * .1f)); context.Fill(color, textBoxPolygon); context.Draw(color, thickness, textBoxPolygon); @@ -301,15 +297,12 @@ private static Image CreateContours(this Image source, Color color, floa foreach (var points in contours) { - if (points.Count < 2) + if (points.Length < 2) { continue; } - var pathBuilder = new PathBuilder(); - pathBuilder.AddLines(points.Select(x => (PointF)x)); - - var path = pathBuilder.Build(); + var path = new PathBuilder().AddLines(points.Select(point => (PointF)point)).Build(); result.Mutate(x => { @@ -320,7 +313,7 @@ private static Image CreateContours(this Image source, Color color, floa return result; } - private static PointF[] GetPoints(BoundingBox box) + private static PointF[] GetPoints(Detection box) { var rect = box.Bounds; @@ -333,14 +326,14 @@ private static PointF[] GetPoints(BoundingBox box) ]; } - private static PointF[] GetPoints(ObbBoundingBox box) + private static PointF[] GetPoints(ObbDetection box) { var points = box.GetCornerPoints(); return [.. points.Select(point => new PointF(point.X, point.Y))]; } - private static void EnsureSize(Size origin, Size result) + private static void ValidateSize(Size origin, Size result) { if (origin != result) { @@ -348,5 +341,10 @@ private static void EnsureSize(Size origin, Size result) } } + private static float GetRatio(Size size) + { + return Math.Max(size.Width, size.Height) / 640f; + } + #endregion } \ No newline at end of file diff --git a/Source/YoloV8/Base/YoloV8PlottingExtensions.cs b/Source/YoloV8/Plotting/PredictorPlottingExtensions.cs similarity index 57% rename from Source/YoloV8/Base/YoloV8PlottingExtensions.cs rename to Source/YoloV8/Plotting/PredictorPlottingExtensions.cs index fe6f2cf..6fe468b 100644 --- a/Source/YoloV8/Base/YoloV8PlottingExtensions.cs +++ b/Source/YoloV8/Plotting/PredictorPlottingExtensions.cs @@ -2,11 +2,11 @@ namespace Compunet.YoloV8; -public static class YoloV8PlottingExtensions +public static class PredictorPlottingExtensions { #region TaskAndSave Sync - public static PoseResult PoseAndSave(this YoloV8Predictor predictor, string path, string? output = null, YoloV8Configuration? configuration = null, PosePlottingOptions? options = null) + public static YoloResult PoseAndSave(this YoloPredictor predictor, string path, string? output = null, YoloConfiguration? configuration = null, PosePlottingOptions? options = null) { using var image = Image.Load(path); @@ -21,7 +21,7 @@ public static PoseResult PoseAndSave(this YoloV8Predictor predictor, string path return result; } - public static DetectionResult DetectAndSave(this YoloV8Predictor predictor, string path, string? output = null, YoloV8Configuration? configuration = null, DetectionPlottingOptions? options = null) + public static YoloResult DetectAndSave(this YoloPredictor predictor, string path, string? output = null, YoloConfiguration? configuration = null, DetectionPlottingOptions? options = null) { using var image = Image.Load(path); @@ -36,7 +36,7 @@ public static DetectionResult DetectAndSave(this YoloV8Predictor predictor, stri return result; } - public static ObbDetectionResult DetectObbAndSave(this YoloV8Predictor predictor, string path, string? output = null, YoloV8Configuration? configuration = null, DetectionPlottingOptions? options = null) + public static YoloResult DetectObbAndSave(this YoloPredictor predictor, string path, string? output = null, YoloConfiguration? configuration = null, DetectionPlottingOptions? options = null) { using var image = Image.Load(path); @@ -51,7 +51,7 @@ public static ObbDetectionResult DetectObbAndSave(this YoloV8Predictor predictor return result; } - public static SegmentationResult SegmentAndSave(this YoloV8Predictor predictor, string path, string? output = null, YoloV8Configuration? configuration = null, SegmentationPlottingOptions? options = null) + public static YoloResult SegmentAndSave(this YoloPredictor predictor, string path, string? output = null, YoloConfiguration? configuration = null, SegmentationPlottingOptions? options = null) { using var image = Image.Load(path); @@ -66,7 +66,7 @@ public static SegmentationResult SegmentAndSave(this YoloV8Predictor predictor, return result; } - public static ClassificationResult ClassifyAndSave(this YoloV8Predictor predictor, string path, string? output = null, YoloV8Configuration? configuration = null, ClassificationPlottingOptions? options = null) + public static YoloResult ClassifyAndSave(this YoloPredictor predictor, string path, string? output = null, YoloConfiguration? configuration = null, ClassificationPlottingOptions? options = null) { using var image = Image.Load(path); @@ -85,11 +85,11 @@ public static ClassificationResult ClassifyAndSave(this YoloV8Predictor predicto #region TaskAndSaveAsync - public static async Task PoseAndSaveAsync(this YoloV8Predictor predictor, - string path, - string? output = null, - YoloV8Configuration? configuration = null, - PosePlottingOptions? options = null) + public static async Task> PoseAndSaveAsync(this YoloPredictor predictor, + string path, + string? output = null, + YoloConfiguration? configuration = null, + PosePlottingOptions? options = null) { using var image = Image.Load(path); @@ -104,11 +104,11 @@ public static async Task PoseAndSaveAsync(this YoloV8Predictor predi return result; } - public static async Task DetectAndSaveAsync(this YoloV8Predictor predictor, - string path, - string? output = null, - YoloV8Configuration? configuration = null, - DetectionPlottingOptions? options = null) + public static async Task> DetectAndSaveAsync(this YoloPredictor predictor, + string path, + string? output = null, + YoloConfiguration? configuration = null, + DetectionPlottingOptions? options = null) { using var image = Image.Load(path); @@ -124,11 +124,11 @@ public static async Task DetectAndSaveAsync(this YoloV8Predicto } - public static async Task DetectObbAndSaveAsync(this YoloV8Predictor predictor, - string path, - string? output = null, - YoloV8Configuration? configuration = null, - DetectionPlottingOptions? options = null) + public static async Task> DetectObbAndSaveAsync(this YoloPredictor predictor, + string path, + string? output = null, + YoloConfiguration? configuration = null, + DetectionPlottingOptions? options = null) { using var image = Image.Load(path); @@ -143,11 +143,11 @@ public static async Task DetectObbAndSaveAsync(this YoloV8Pr return result; } - public static async Task SegmentAndSaveAsync(this YoloV8Predictor predictor, - string path, - string? output = null, - YoloV8Configuration? configuration = null, - SegmentationPlottingOptions? options = null) + public static async Task> SegmentAndSaveAsync(this YoloPredictor predictor, + string path, + string? output = null, + YoloConfiguration? configuration = null, + SegmentationPlottingOptions? options = null) { using var image = Image.Load(path); @@ -162,11 +162,11 @@ public static async Task SegmentAndSaveAsync(this YoloV8Pred return result; } - public static async Task ClassifyAndSaveAsync(this YoloV8Predictor predictor, - string path, - string? output = null, - YoloV8Configuration? configuration = null, - ClassificationPlottingOptions? options = null) + public static async Task> ClassifyAndSaveAsync(this YoloPredictor predictor, + string path, + string? output = null, + YoloConfiguration? configuration = null, + ClassificationPlottingOptions? options = null) { using var image = Image.Load(path); @@ -185,35 +185,35 @@ public static async Task ClassifyAndSaveAsync(this YoloV8P #region PredictAndSave - public static YoloV8Result PredictAndSave(this YoloV8Predictor predictor, string path, string? output = null, YoloV8Configuration? configuration = null, PlottingOptions? options = null) + public static YoloResult PredictAndSave(this YoloPredictor predictor, string path, string? output = null, YoloConfiguration? configuration = null, PlottingOptions? options = null) { return predictor.Metadata.Task switch { - YoloV8Task.Pose => PoseAndSave(predictor, path, output, configuration, options as PosePlottingOptions), - YoloV8Task.Detect => DetectAndSave(predictor, path, output, configuration, options as DetectionPlottingOptions), - YoloV8Task.Obb => DetectObbAndSave(predictor, path, output, configuration, options as DetectionPlottingOptions), - YoloV8Task.Segment => SegmentAndSave(predictor, path, output, configuration, options as SegmentationPlottingOptions), - YoloV8Task.Classify => ClassifyAndSave(predictor, path, output, configuration, options as ClassificationPlottingOptions), + YoloTask.Pose => PoseAndSave(predictor, path, output, configuration, options as PosePlottingOptions), + YoloTask.Detect => DetectAndSave(predictor, path, output, configuration, options as DetectionPlottingOptions), + YoloTask.Obb => DetectObbAndSave(predictor, path, output, configuration, options as DetectionPlottingOptions), + YoloTask.Segment => SegmentAndSave(predictor, path, output, configuration, options as SegmentationPlottingOptions), + YoloTask.Classify => ClassifyAndSave(predictor, path, output, configuration, options as ClassificationPlottingOptions), _ => throw new NotSupportedException("Unsupported YOLOv8 task") }; } - public static async Task PredictAndSaveAsync(this YoloV8Predictor predictor, string path, string? output = null, YoloV8Configuration? configuration = null, PlottingOptions? options = null) + public static async Task PredictAndSaveAsync(this YoloPredictor predictor, string path, string? output = null, YoloConfiguration? configuration = null, PlottingOptions? options = null) { return predictor.Metadata.Task switch { - YoloV8Task.Pose => await PoseAndSaveAsync(predictor, path, output, configuration, options as PosePlottingOptions), - YoloV8Task.Detect => await DetectAndSaveAsync(predictor, path, output, configuration, options as DetectionPlottingOptions), - YoloV8Task.Obb => await DetectObbAndSaveAsync(predictor, path, output, configuration, options as DetectionPlottingOptions), - YoloV8Task.Segment => await SegmentAndSaveAsync(predictor, path, output, configuration, options as SegmentationPlottingOptions), - YoloV8Task.Classify => await ClassifyAndSaveAsync(predictor, path, output, configuration, options as ClassificationPlottingOptions), + YoloTask.Pose => await PoseAndSaveAsync(predictor, path, output, configuration, options as PosePlottingOptions), + YoloTask.Detect => await DetectAndSaveAsync(predictor, path, output, configuration, options as DetectionPlottingOptions), + YoloTask.Obb => await DetectObbAndSaveAsync(predictor, path, output, configuration, options as DetectionPlottingOptions), + YoloTask.Segment => await SegmentAndSaveAsync(predictor, path, output, configuration, options as SegmentationPlottingOptions), + YoloTask.Classify => await ClassifyAndSaveAsync(predictor, path, output, configuration, options as ClassificationPlottingOptions), _ => throw new NotSupportedException("Unsupported YOLOv8 task") }; } #endregion - private static string CreateImageOutputPath(string path, YoloV8Task task) + private static string CreateImageOutputPath(string path, YoloTask task) { var baseDirectory = Path.GetDirectoryName(path) ?? Environment.CurrentDirectory; @@ -232,7 +232,6 @@ private static string CreateImageOutputPath(string path, YoloV8Task task) while (true) { var filename = index == 0 ? $"{name}{extn}" : $"{name}_{index}{extn}"; - var fullpath = Path.Combine(plotDirectory, filename); if (File.Exists(fullpath)) diff --git a/Source/YoloV8/Timing/SpeedTimer.cs b/Source/YoloV8/Predictor/PredictorTimer.cs similarity index 59% rename from Source/YoloV8/Timing/SpeedTimer.cs rename to Source/YoloV8/Predictor/PredictorTimer.cs index 55cdb99..7cc1d30 100644 --- a/Source/YoloV8/Timing/SpeedTimer.cs +++ b/Source/YoloV8/Predictor/PredictorTimer.cs @@ -1,6 +1,6 @@ -namespace Compunet.YoloV8.Timing; +namespace Compunet.YoloV8; -public class SpeedTimer +internal ref struct PredictorTimer() { private readonly Stopwatch _stopwatch = new(); @@ -8,13 +8,7 @@ public class SpeedTimer private TimeSpan _inference; private TimeSpan _postprocess; - public TimeSpan Preprocess => _preprocess; - - public TimeSpan Inference => _inference; - - public TimeSpan Postprocess => _postprocess; - - public void StartPreprocess() + public readonly void StartPreprocess() { _stopwatch.Restart(); } @@ -36,8 +30,6 @@ public SpeedResult Stop() _postprocess = _stopwatch.Elapsed; _stopwatch.Stop(); - return new SpeedResult(_preprocess, - _inference, - _postprocess); + return new SpeedResult(_preprocess, _inference, _postprocess); } } \ No newline at end of file diff --git a/Source/YoloV8/Predictor/ServiceResolver.cs b/Source/YoloV8/Predictor/ServiceResolver.cs new file mode 100644 index 0000000..8839b6a --- /dev/null +++ b/Source/YoloV8/Predictor/ServiceResolver.cs @@ -0,0 +1,122 @@ +namespace Compunet.YoloV8; + +internal class ServiceResolver : IDisposable +{ + private readonly YoloMetadata _metadata; + private readonly InferenceSession _session; + private readonly SessionTensorInfo _tensorInfo; + private readonly YoloConfiguration _configuration; + + private readonly ServiceProvider _provider; + private readonly Dictionary _providers = []; + + private bool _disposed; + + public ServiceResolver(InferenceSession session, YoloConfiguration configuration) + { + _session = session; + _configuration = configuration; + _metadata = YoloMetadata.Parse(session); + _tensorInfo = new SessionTensorInfo(session); + + // Create default services + var services = CreateDefaultServices(_metadata); + + // Add options + services.AddSingleton(_session); + services.AddSingleton(_tensorInfo); + services.AddSingleton(_configuration); + + // Build the service provider + _provider = services.BuildServiceProvider(); + } + + public T Resolve(YoloConfiguration? configuration = null) where T : notnull + { + ObjectDisposedException.ThrowIf(_disposed, this); + + if (configuration is null || _configuration.Equals(configuration)) + { + return _provider.GetRequiredService(); + } + + if (_providers.TryGetValue(configuration, out var p)) + { + return p.GetRequiredService(); + } + else + { + var services = CreateDefaultServices(_metadata); + + services.AddSingleton(_session); + services.AddSingleton(_tensorInfo); + services.AddSingleton(configuration); + + var provider = services.BuildServiceProvider(); + + _providers.Add(configuration, provider); + + return provider.GetRequiredService(); + } + } + + private static ServiceCollection CreateDefaultServices(YoloMetadata metadata) + { + var services = new ServiceCollection(); + + if (metadata is YoloPoseMetadata pose) + { + services.AddSingleton(pose); + } + + services + .AddSingleton(metadata) + .AddSingleton() + .AddSingleton() + .AddSingleton() + .AddSingleton() + .AddSingleton(); + + switch (metadata.Task) + { + case YoloTask.Pose: + services.AddSingleton, PoseParser>(); + break; + + case YoloTask.Detect: + services.AddSingleton, DetectionParser>(); + break; + + case YoloTask.Obb: + services.AddSingleton, ObbDetectionParser>(); + break; + + case YoloTask.Segment: + services.AddSingleton, SegmentationParser>(); + break; + + case YoloTask.Classify: + services.AddSingleton, ClassificationParser>(); + break; + } + + return services; + } + + public void Dispose() + { + if (_disposed) + { + return; + } + + _provider.Dispose(); + + foreach (var provider in _providers.Values) + { + provider.Dispose(); + } + + _disposed = true; + } +} \ No newline at end of file diff --git a/Source/YoloV8/Predictor/YoloConfiguration.cs b/Source/YoloV8/Predictor/YoloConfiguration.cs new file mode 100644 index 0000000..4702e0d --- /dev/null +++ b/Source/YoloV8/Predictor/YoloConfiguration.cs @@ -0,0 +1,67 @@ +namespace Compunet.YoloV8; + +/// +/// Configuration settings for the YoloPredictor. +/// +public class YoloConfiguration : IEquatable +{ + /// + /// Default YOLO configuration. + /// + public static readonly YoloConfiguration Default = new(); + + /// + /// Specify the minimum confidence value for including a result. Default is 0.3f. + /// + public float Confidence { get; set; } = .3f; + + /// + /// Specify the minimum IoU value for Non-Maximum Suppression (NMS). Default is 0.45f. + /// + public float IoU { get; set; } = .45f; + + /// + /// Specify whether to keep the image aspect ratio when resizing. Default is true. + /// + public bool KeepAspectRatio { get; set; } = true; + + /// + /// Specify whether to skip automatic image orientation correction on load (can improve performance). Default is false. + /// + public bool SkipImageAutoOrient { get; set; } = false; + + /// + /// Specify whether to suppress parallel inference (pre-processing and post-processing will run in parallelly). Default is false. + /// + public bool SuppressParallelInference { get; set; } = false; + + public bool Equals(YoloConfiguration? other) + { + if (other is null) + { + return false; + } + + if (ReferenceEquals(this, other)) + { + return true; + } + + return Confidence == other.Confidence + && IoU == other.IoU + && KeepAspectRatio == other.KeepAspectRatio + && SkipImageAutoOrient == other.SkipImageAutoOrient + && SuppressParallelInference == other.SuppressParallelInference; + } + + public override bool Equals(object? obj) => Equals(obj as YoloConfiguration); + + public override int GetHashCode() + { + return Confidence.GetHashCode() + ^ IoU.GetHashCode() + ^ KeepAspectRatio.GetHashCode() + ^ SkipImageAutoOrient.GetHashCode() + ^ SuppressParallelInference.GetHashCode(); + } +} \ No newline at end of file diff --git a/Source/YoloV8/Predictor/YoloPredictor.cs b/Source/YoloV8/Predictor/YoloPredictor.cs new file mode 100644 index 0000000..3903899 --- /dev/null +++ b/Source/YoloV8/Predictor/YoloPredictor.cs @@ -0,0 +1,120 @@ +namespace Compunet.YoloV8; + +public class YoloPredictor : IDisposable +{ + private readonly ServiceResolver _resolver; + private readonly InferenceSession _session; + + private bool _disposed; + + public YoloMetadata Metadata { get; } + + public YoloConfiguration Configuration { get; } + + #region Constractor + + public YoloPredictor(string path) : this(File.ReadAllBytes(path), YoloPredictorOptions.Default) { } + + public YoloPredictor(byte[] model) : this(model, YoloPredictorOptions.Default) { } + + public YoloPredictor(string path, YoloPredictorOptions options) : this(File.ReadAllBytes(path), options) { } + + public YoloPredictor(byte[] model, YoloPredictorOptions options) + { + // Create onnx runtime inference session + _session = options.CreateSession(model); + + // Create predictor services resolver + _resolver = new ServiceResolver(_session, options.Configuration ?? YoloConfiguration.Default); + + Metadata = _resolver.Resolve(); + Configuration = _resolver.Resolve(); + } + + #endregion + + #region Predict + + internal YoloResult Predict(Image image, YoloConfiguration? configuration) where T : IYoloPrediction + { + // Validate the model task + ValidateTask(); + + // Resolve runner service + var runner = _resolver.Resolve(); + + // Run the model (include pre-process) + using var output = runner.PreprocessAndRun(image, out var timer); + + // Start postprocess timer + timer.StartPostprocess(); + + // Resolve the parser + var parser = _resolver.Resolve>(configuration); + + // Parse the tensor to result + var result = parser.ProcessTensorToResult(output, image.Size); + + // Create YoloResult + return new YoloResult(result) + { + Speed = timer.Stop(), + ImageSize = image.Size, + }; + } + + #endregion + + internal T ResolveService(YoloConfiguration? configuration) where T : notnull => _resolver.Resolve(configuration); + + private void ValidateTask() where T : IYoloPrediction + { + YoloTask task; + + if (typeof(T) == typeof(Pose)) + { + task = YoloTask.Pose; + } + else if (typeof(T) == typeof(Detection)) + { + task = YoloTask.Detect; + } + else if (typeof(T) == typeof(ObbDetection)) + { + task = YoloTask.Obb; + } + else if (typeof(T) == typeof(Segmentation)) + { + task = YoloTask.Segment; + } + else if (typeof(T) == typeof(Classification)) + { + task = YoloTask.Classify; + } + else + { + throw new InvalidOperationException(); + } + + var currentTask = Metadata.Task; + + if (currentTask != task) + { + throw new InvalidOperationException($"The loaded model does not support this task (expected: '{task.ToString().ToLower()}' actual: '{currentTask.ToString().ToLower()}')"); + } + } + + public void Dispose() + { + if (_disposed) + { + return; + } + + _session.Dispose(); + _resolver.Dispose(); + _disposed = true; + + GC.SuppressFinalize(this); + } +} \ No newline at end of file diff --git a/Source/YoloV8/Predictor/YoloPredictorAsyncExtensions.cs b/Source/YoloV8/Predictor/YoloPredictorAsyncExtensions.cs new file mode 100644 index 0000000..5b99b30 --- /dev/null +++ b/Source/YoloV8/Predictor/YoloPredictorAsyncExtensions.cs @@ -0,0 +1,129 @@ +namespace Compunet.YoloV8; + +public static class YoloPredictorAsyncExtensions +{ + private static readonly DecoderOptions _skipMetadataOptions = new() + { + SkipMetadata = true, + }; + + #region Predict Image From Path + + public static Task> PoseAsync(this YoloPredictor predictor, string path, YoloConfiguration? configuration = null) + => PoseAsync(predictor, LoadImage(path, configuration ?? predictor.Configuration), configuration); + + public static Task> DetectAsync(this YoloPredictor predictor, string path, YoloConfiguration? configuration = null) + => DetectAsync(predictor, LoadImage(path, configuration ?? predictor.Configuration), configuration); + + public static Task> DetectObbAsync(this YoloPredictor predictor, string path, YoloConfiguration? configuration = null) + => DetectObbAsync(predictor, LoadImage(path, configuration ?? predictor.Configuration), configuration); + + public static Task> SegmentAsync(this YoloPredictor predictor, string path, YoloConfiguration? configuration = null) + => SegmentAsync(predictor, LoadImage(path, configuration ?? predictor.Configuration), configuration); + + public static Task> ClassifyAsync(this YoloPredictor predictor, string path, YoloConfiguration? configuration = null) + => ClassifyAsync(predictor, LoadImage(path, configuration ?? predictor.Configuration), configuration); + + #endregion + + #region Predict Image From Stream + + public static Task> PoseAsync(this YoloPredictor predictor, Stream stream, YoloConfiguration? configuration = null) + => PoseAsync(predictor, LoadImage(stream, configuration ?? predictor.Configuration), configuration); + + public static Task> DetectAsync(this YoloPredictor predictor, Stream stream, YoloConfiguration? configuration = null) + => DetectAsync(predictor, LoadImage(stream, configuration ?? predictor.Configuration), configuration); + + public static Task> DetectObbAsync(this YoloPredictor predictor, Stream stream, YoloConfiguration? configuration = null) + => DetectObbAsync(predictor, LoadImage(stream, configuration ?? predictor.Configuration), configuration); + + public static Task> SegmentAsync(this YoloPredictor predictor, Stream stream, YoloConfiguration? configuration = null) + => SegmentAsync(predictor, LoadImage(stream, configuration ?? predictor.Configuration), configuration); + + public static Task> ClassifyAsync(this YoloPredictor predictor, Stream stream, YoloConfiguration? configuration = null) + => ClassifyAsync(predictor, LoadImage(stream, configuration ?? predictor.Configuration), configuration); + + #endregion + + #region Predict Image From Buffer + + public static Task> PoseAsync(this YoloPredictor predictor, byte[] buffer, YoloConfiguration? configuration = null) + => PoseAsync(predictor, LoadImage(buffer, configuration ?? predictor.Configuration), configuration); + + public static Task> DetectAsync(this YoloPredictor predictor, byte[] buffer, YoloConfiguration? configuration = null) + => DetectAsync(predictor, LoadImage(buffer, configuration ?? predictor.Configuration), configuration); + + public static Task> DetectObbAsync(this YoloPredictor predictor, byte[] buffer, YoloConfiguration? configuration = null) + => DetectObbAsync(predictor, LoadImage(buffer, configuration ?? predictor.Configuration), configuration); + + public static Task> SegmentAsync(this YoloPredictor predictor, byte[] buffer, YoloConfiguration? configuration = null) + => SegmentAsync(predictor, LoadImage(buffer, configuration ?? predictor.Configuration), configuration); + + public static Task> ClassifyAsync(this YoloPredictor predictor, byte[] buffer, YoloConfiguration? configuration = null) + => ClassifyAsync(predictor, LoadImage(buffer, configuration ?? predictor.Configuration), configuration); + + #endregion + + #region Predict Image + + public static Task> PoseAsync(this YoloPredictor predictor, Image image, YoloConfiguration? configuration = null) + => PoseAsync(predictor, image.As(), configuration); + + public static Task> DetectAsync(this YoloPredictor predictor, Image image, YoloConfiguration? configuration = null) + => DetectAsync(predictor, image.As(), configuration); + + public static Task> DetectObbAsync(this YoloPredictor predictor, Image image, YoloConfiguration? configuration = null) + => DetectObbAsync(predictor, image.As(), configuration); + + public static Task> SegmentAsync(this YoloPredictor predictor, Image image, YoloConfiguration? configuration = null) + => SegmentAsync(predictor, image.As(), configuration); + + public static Task> ClassifyAsync(this YoloPredictor predictor, Image image, YoloConfiguration? configuration = null) + => ClassifyAsync(predictor, image.As(), configuration); + + #endregion + + #region Predict Async Image + + public static Task> PoseAsync(this YoloPredictor predictor, Image image, YoloConfiguration? configuration = null) + => Task.Run(() => predictor.Pose(image, configuration)); + + public static Task> DetectAsync(this YoloPredictor predictor, Image image, YoloConfiguration? configuration = null) + => Task.Run(() => predictor.Detect(image, configuration)); + + public static Task> DetectObbAsync(this YoloPredictor predictor, Image image, YoloConfiguration? configuration = null) + => Task.Run(() => predictor.DetectObb(image, configuration)); + + public static Task> SegmentAsync(this YoloPredictor predictor, Image image, YoloConfiguration? configuration = null) + => Task.Run(() => predictor.Segment(image, configuration)); + + public static Task> ClassifyAsync(this YoloPredictor predictor, Image image, YoloConfiguration? configuration = null) + => Task.Run(() => predictor.Classify(image, configuration)); + + #endregion + + #region LoadImage + + private static Image LoadImage(string path, YoloConfiguration configuration) + { + return configuration.SkipImageAutoOrient + ? Image.Load(_skipMetadataOptions, path) + : Image.Load(path); + } + + private static Image LoadImage(Stream stream, YoloConfiguration configuration) + { + return configuration.SkipImageAutoOrient + ? Image.Load(_skipMetadataOptions, stream) + : Image.Load(stream); + } + + private static Image LoadImage(byte[] buffer, YoloConfiguration configuration) + { + return configuration.SkipImageAutoOrient + ? Image.Load(_skipMetadataOptions, buffer) + : Image.Load(buffer); + } + + #endregion +} \ No newline at end of file diff --git a/Source/YoloV8/Predictor/YoloPredictorExtensions.cs b/Source/YoloV8/Predictor/YoloPredictorExtensions.cs new file mode 100644 index 0000000..f3579db --- /dev/null +++ b/Source/YoloV8/Predictor/YoloPredictorExtensions.cs @@ -0,0 +1,124 @@ +namespace Compunet.YoloV8; + +public static class YoloPredictorExtensions +{ + private static readonly DecoderOptions _skipMetadataOptions = new() + { + SkipMetadata = true, + }; + + #region Predict Image From Path + + public static YoloResult Pose(this YoloPredictor predictor, string path, YoloConfiguration? configuration = null) + => Pose(predictor, LoadImage(path, configuration ?? predictor.Configuration), configuration); + + public static YoloResult Detect(this YoloPredictor predictor, string path, YoloConfiguration? configuration = null) + => Detect(predictor, LoadImage(path, configuration ?? predictor.Configuration), configuration); + + public static YoloResult DetectObb(this YoloPredictor predictor, string path, YoloConfiguration? configuration = null) + => DetectObb(predictor, LoadImage(path, configuration ?? predictor.Configuration), configuration); + + public static YoloResult Segment(this YoloPredictor predictor, string path, YoloConfiguration? configuration = null) + => Segment(predictor, LoadImage(path, configuration ?? predictor.Configuration), configuration); + + public static YoloResult Classify(this YoloPredictor predictor, string path, YoloConfiguration? configuration = null) + => Classify(predictor, LoadImage(path, configuration ?? predictor.Configuration), configuration); + + #endregion + + #region Predict Image From Stream + + public static YoloResult Pose(this YoloPredictor predictor, Stream stream, YoloConfiguration? configuration = null) + => Pose(predictor, LoadImage(stream, configuration ?? predictor.Configuration), configuration); + + public static YoloResult Detect(this YoloPredictor predictor, Stream stream, YoloConfiguration? configuration = null) + => Detect(predictor, LoadImage(stream, configuration ?? predictor.Configuration), configuration); + + public static YoloResult DetectObb(this YoloPredictor predictor, Stream stream, YoloConfiguration? configuration = null) + => DetectObb(predictor, LoadImage(stream, configuration ?? predictor.Configuration), configuration); + + public static YoloResult Segment(this YoloPredictor predictor, Stream stream, YoloConfiguration? configuration = null) + => Segment(predictor, LoadImage(stream, configuration ?? predictor.Configuration), configuration); + + public static YoloResult Classify(this YoloPredictor predictor, Stream stream, YoloConfiguration? configuration = null) + => Classify(predictor, LoadImage(stream, configuration ?? predictor.Configuration), configuration); + + #endregion + + #region Predict Image From Buffer + + public static YoloResult Pose(this YoloPredictor predictor, byte[] buffer, YoloConfiguration? configuration = null) + => Pose(predictor, LoadImage(buffer, configuration ?? predictor.Configuration), configuration); + + public static YoloResult Detect(this YoloPredictor predictor, byte[] buffer, YoloConfiguration? configuration = null) + => Detect(predictor, LoadImage(buffer, configuration ?? predictor.Configuration), configuration); + + public static YoloResult DetectObb(this YoloPredictor predictor, byte[] buffer, YoloConfiguration? configuration = null) + => DetectObb(predictor, LoadImage(buffer, configuration ?? predictor.Configuration), configuration); + + public static YoloResult Segment(this YoloPredictor predictor, byte[] buffer, YoloConfiguration? configuration = null) + => Segment(predictor, LoadImage(buffer, configuration ?? predictor.Configuration), configuration); + + public static YoloResult Classify(this YoloPredictor predictor, byte[] buffer, YoloConfiguration? configuration = null) + => Classify(predictor, LoadImage(buffer, configuration ?? predictor.Configuration), configuration); + + #endregion + + #region Predict Image + + public static YoloResult Pose(this YoloPredictor predictor, Image image, YoloConfiguration? configuration = null) => Pose(predictor, image.As(), configuration); + + public static YoloResult Detect(this YoloPredictor predictor, Image image, YoloConfiguration? configuration = null) => Detect(predictor, image.As(), configuration); + + public static YoloResult DetectObb(this YoloPredictor predictor, Image image, YoloConfiguration? configuration = null) => DetectObb(predictor, image.As(), configuration); + + public static YoloResult Segment(this YoloPredictor predictor, Image image, YoloConfiguration? configuration = null) => Segment(predictor, image.As(), configuration); + + public static YoloResult Classify(this YoloPredictor predictor, Image image, YoloConfiguration? configuration = null) => Classify(predictor, image.As(), configuration); + + #endregion + + #region Predict Image + + public static YoloResult Pose(this YoloPredictor predictor, Image image, YoloConfiguration? configuration = null) + => predictor.Predict(image, configuration); + + public static YoloResult Detect(this YoloPredictor predictor, Image image, YoloConfiguration? configuration = null) + => predictor.Predict(image, configuration); + + public static YoloResult DetectObb(this YoloPredictor predictor, Image image, YoloConfiguration? configuration = null) + => predictor.Predict(image, configuration); + + public static YoloResult Segment(this YoloPredictor predictor, Image image, YoloConfiguration? configuration = null) + => predictor.Predict(image, configuration); + + public static YoloResult Classify(this YoloPredictor predictor, Image image, YoloConfiguration? configuration = null) + => predictor.Predict(image, configuration); + + #endregion + + #region LoadImage + + private static Image LoadImage(string path, YoloConfiguration configuration) + { + return configuration.SkipImageAutoOrient + ? Image.Load(_skipMetadataOptions, path) + : Image.Load(path); + } + + private static Image LoadImage(Stream stream, YoloConfiguration configuration) + { + return configuration.SkipImageAutoOrient + ? Image.Load(_skipMetadataOptions, stream) + : Image.Load(stream); + } + + private static Image LoadImage(byte[] buffer, YoloConfiguration configuration) + { + return configuration.SkipImageAutoOrient + ? Image.Load(_skipMetadataOptions, buffer) + : Image.Load(buffer); + } + + #endregion +} \ No newline at end of file diff --git a/Source/YoloV8/Predictor/YoloPredictorOptions.cs b/Source/YoloV8/Predictor/YoloPredictorOptions.cs new file mode 100644 index 0000000..33f42d3 --- /dev/null +++ b/Source/YoloV8/Predictor/YoloPredictorOptions.cs @@ -0,0 +1,34 @@ +namespace Compunet.YoloV8; + +public class YoloPredictorOptions +{ + public static YoloPredictorOptions Default { get; } = new(); + + public bool UseCuda { get; init; } + + public int CudaDeviceId { get; init; } + + public SessionOptions? SessionOptions { get; init; } + + public YoloConfiguration? Configuration { get; init; } + + internal InferenceSession CreateSession(byte[] model) + { + if (UseCuda) + { + if (SessionOptions is not null) + { + throw new InvalidOperationException("'UseCuda' and 'SessionOptions' cannot be used together"); + } + + return new InferenceSession(model, SessionOptions.MakeSessionOptionWithCudaProvider(CudaDeviceId)); + } + + if (SessionOptions != null) + { + return new InferenceSession(model, SessionOptions); + } + + return new InferenceSession(model); + } +} diff --git a/Source/YoloV8/Selectors/BinarySelector.cs b/Source/YoloV8/Selectors/BinarySelector.cs deleted file mode 100644 index 91e5bcb..0000000 --- a/Source/YoloV8/Selectors/BinarySelector.cs +++ /dev/null @@ -1,33 +0,0 @@ -namespace Compunet.YoloV8; - -public class BinarySelector -{ - private readonly Func _factory; - - public BinarySelector(string path) - { - _factory = () => File.ReadAllBytes(path); - } - - public BinarySelector(byte[] data) - { - _factory = () => data; - } - - public BinarySelector(Stream stream) - { - _factory = () => - { - using var memory = new MemoryStream(); - stream.CopyTo(memory); - - return memory.ToArray(); - }; - } - - internal byte[] Load() => _factory(); - - public static implicit operator BinarySelector(string path) => new(path); - public static implicit operator BinarySelector(byte[] data) => new(data); - public static implicit operator BinarySelector(Stream stream) => new(stream); -} \ No newline at end of file diff --git a/Source/YoloV8/Selectors/ImageSelector.cs b/Source/YoloV8/Selectors/ImageSelector.cs deleted file mode 100644 index 24d7dad..0000000 --- a/Source/YoloV8/Selectors/ImageSelector.cs +++ /dev/null @@ -1,24 +0,0 @@ -namespace Compunet.YoloV8; - -public class ImageSelector : ImageSelector -{ - public ImageSelector(Image image) - : base(image) { } - - public ImageSelector(string path) - : base(path) { } - - public ImageSelector(byte[] data) - : base(data) { } - - public ImageSelector(Stream stream) - : base(stream) { } - - public static implicit operator ImageSelector(Image image) => new(image); - - public static implicit operator ImageSelector(string path) => new(path); - - public static implicit operator ImageSelector(byte[] data) => new(data); - - public static implicit operator ImageSelector(Stream stream) => new(stream); -} diff --git a/Source/YoloV8/Selectors/ImageSelector{TPixel}.cs b/Source/YoloV8/Selectors/ImageSelector{TPixel}.cs deleted file mode 100644 index 79672c5..0000000 --- a/Source/YoloV8/Selectors/ImageSelector{TPixel}.cs +++ /dev/null @@ -1,41 +0,0 @@ -namespace Compunet.YoloV8; - -public class ImageSelector where TPixel : unmanaged, IPixel -{ - private readonly Func> _factory; - - public ImageSelector(Image image) - { - _factory = image.CloneAs; - } - - public ImageSelector(string path) - { - _factory = () => Image.Load(path); - } - - public ImageSelector(byte[] data) - { - _factory = () => Image.Load(data); - } - - public ImageSelector(Stream stream) - { - _factory = () => Image.Load(stream); - } - - internal Image Load(bool autoOrient) - { - var image = _factory(); - - if (autoOrient) - image.Mutate(x => x.AutoOrient()); - - return image; - } - - public static implicit operator ImageSelector(Image image) => new(image); - public static implicit operator ImageSelector(string path) => new(path); - public static implicit operator ImageSelector(byte[] data) => new(data); - public static implicit operator ImageSelector(Stream stream) => new(stream); -} \ No newline at end of file diff --git a/Source/YoloV8/Services/MemoryAllocatorService.cs b/Source/YoloV8/Services/MemoryAllocatorService.cs new file mode 100644 index 0000000..e15758b --- /dev/null +++ b/Source/YoloV8/Services/MemoryAllocatorService.cs @@ -0,0 +1,51 @@ +namespace Compunet.YoloV8.Services; + +internal class MemoryAllocatorService : IMemoryAllocatorService +{ + #region ArrayMemoryPoolBuffer + + private class ArrayMemoryPoolBuffer : IMemoryOwner + { + private readonly int _length; + + private T[]? _buffer; + + public Memory Memory + { + get + { + ObjectDisposedException.ThrowIf(_buffer == null, this); + return new Memory(_buffer, 0, _length); + } + } + + public ArrayMemoryPoolBuffer(int length, bool clean) + { + var source = ArrayPool.Shared.Rent(length); + + if (clean) + { + Array.Clear(source, 0, length); + } + + _length = length; + _buffer = source; + } + + public void Dispose() + { + if (_buffer != null) + { + ArrayPool.Shared.Return(_buffer); + _buffer = null; + } + } + } + + #endregion + + public IMemoryOwner Allocate(int length, bool clean = false) + { + return new ArrayMemoryPoolBuffer(length, clean); + } +} \ No newline at end of file diff --git a/Source/YoloV8/Services/NonMaxSuppressionService.cs b/Source/YoloV8/Services/NonMaxSuppressionService.cs new file mode 100644 index 0000000..4779f73 --- /dev/null +++ b/Source/YoloV8/Services/NonMaxSuppressionService.cs @@ -0,0 +1,53 @@ +namespace Compunet.YoloV8.Services; + +internal class NonMaxSuppressionService : INonMaxSuppressionService +{ + public T[] Suppress(Span boxes, float iouThreshold) where T : IRawBoundingBox + { + if (boxes.Length == 0) + { + return []; + } + + // Sort by confidence from the high to the low + boxes.Sort((x, y) => y.CompareTo(x)); + + // Initialize result with highest confidence box + var result = new List(4) + { + boxes[0] + }; + + // Iterate boxes (Skip with the first box because it already has been added) + for (var i = 1; i < boxes.Length; i++) + { + var box1 = boxes[i]; + var addToResult = true; + + for (var j = 0; j < result.Count; j++) + { + var box2 = result[j]; + + // Skip boxers with different label + if (box1.Name != box2.Name) + { + continue; + } + + // If the box overlaps another box already in the results + if (T.CalculateIoU(ref box1, ref box2) > iouThreshold) + { + addToResult = false; + break; + } + } + + if (addToResult) + { + result.Add(box1); + } + } + + return [.. result]; + } +} \ No newline at end of file diff --git a/Source/YoloV8/Services/Parsers/ClassificationParser.cs b/Source/YoloV8/Services/Parsers/ClassificationParser.cs new file mode 100644 index 0000000..bdc7072 --- /dev/null +++ b/Source/YoloV8/Services/Parsers/ClassificationParser.cs @@ -0,0 +1,27 @@ +namespace Compunet.YoloV8.Services; + +internal class ClassificationParser(YoloMetadata metadata) : IParser +{ + public Classification[] ProcessTensorToResult(YoloRawOutput tensor, Size size) + { + var tensorSpan = tensor.Output0.Buffer.Span; + + var result = new Classification[tensorSpan.Length]; + + for (var i = 0; i < tensorSpan.Length; i++) + { + var name = metadata.Names[i]; + var confidence = tensorSpan[i]; + + result[i] = new Classification + { + Name = name, + Confidence = confidence, + }; + } + + result.AsSpan().Sort((x, y) => y.Confidence.CompareTo(x.Confidence)); + + return result; + } +} \ No newline at end of file diff --git a/Source/YoloV8/Services/Parsers/DetectionParser.cs b/Source/YoloV8/Services/Parsers/DetectionParser.cs new file mode 100644 index 0000000..a2b5276 --- /dev/null +++ b/Source/YoloV8/Services/Parsers/DetectionParser.cs @@ -0,0 +1,25 @@ +namespace Compunet.YoloV8.Services; + +internal class DetectionParser(IRawBoundingBoxParser rawBoundingBoxParser) : IParser +{ + public Detection[] ProcessTensorToResult(YoloRawOutput output, Size size) + { + var boxes = rawBoundingBoxParser.Parse(output.Output0, size); + + var result = new Detection[boxes.Length]; + + for (var i = 0; i < boxes.Length; i++) + { + var box = boxes[i]; + + result[i] = new Detection + { + Name = box.Name, + Bounds = box.Bounds, + Confidence = box.Confidence, + }; + } + + return result; + } +} \ No newline at end of file diff --git a/Source/YoloV8/Services/Parsers/ObbDetectionParser.cs b/Source/YoloV8/Services/Parsers/ObbDetectionParser.cs new file mode 100644 index 0000000..7aaaa33 --- /dev/null +++ b/Source/YoloV8/Services/Parsers/ObbDetectionParser.cs @@ -0,0 +1,26 @@ +namespace Compunet.YoloV8.Services; + +internal class ObbDetectionParser(IRawBoundingBoxParser rawBoundingBoxParser) : IParser +{ + public ObbDetection[] ProcessTensorToResult(YoloRawOutput output, Size size) + { + var boxes = rawBoundingBoxParser.Parse(output.Output0, size); + + var result = new ObbDetection[boxes.Length]; + + for (var i = 0; i < boxes.Length; i++) + { + var box = boxes[i]; + + result[i] = new ObbDetection + { + Name = box.Name, + Angle = box.Angle, + Bounds = box.Bounds, + Confidence = box.Confidence, + }; + } + + return result; + } +} \ No newline at end of file diff --git a/Source/YoloV8/Services/Parsers/PoseParser.cs b/Source/YoloV8/Services/Parsers/PoseParser.cs new file mode 100644 index 0000000..9d16fbe --- /dev/null +++ b/Source/YoloV8/Services/Parsers/PoseParser.cs @@ -0,0 +1,83 @@ +namespace Compunet.YoloV8.Services; + +internal class PoseParser(YoloPoseMetadata metadata, + YoloConfiguration configuration, + IRawBoundingBoxParser rawBoundingBoxParser) : IParser +{ + public Pose[] ProcessTensorToResult(YoloRawOutput output, Size size) + { + int xPadding; + int yPadding; + + var xRatio = (float)size.Width / metadata.ImageSize.Width; + var yRatio = (float)size.Height / metadata.ImageSize.Height; + + if (configuration.KeepAspectRatio) + { + var reductionRatio = Math.Min(metadata.ImageSize.Width / (float)size.Width, metadata.ImageSize.Height / (float)size.Height); + + xPadding = (int)((metadata.ImageSize.Width - size.Width * reductionRatio) / 2); + yPadding = (int)((metadata.ImageSize.Height - size.Height * reductionRatio) / 2); + + var maxRatio = Math.Max(xRatio, yRatio); + + xRatio = maxRatio; + yRatio = maxRatio; + } + else + { + xPadding = 0; + yPadding = 0; + } + + return ProcessTensorToResult(output.Output0, new Vector(xPadding, yPadding), new Vector(xRatio, yRatio)); + } + + private Pose[] ProcessTensorToResult(DenseTensor tensor, Vector padding, Vector ratio) + { + var boxes = rawBoundingBoxParser.Parse(tensor, padding, ratio); + + var shape = metadata.KeypointShape; + var result = new Pose[boxes.Length]; + + var tensorSpan = tensor.Buffer.Span; + var boxInfoStride = tensor.Strides[1]; + + for (var i = 0; i < boxes.Length; i++) + { + var box = boxes[i]; + var keypoints = new Keypoint[shape.Count]; + + for (var index = 0; index < shape.Count; index++) + { + var offset = index * shape.Channels + 4 + metadata.Names.Length; + + var pointX = (int)((tensorSpan[offset * boxInfoStride + box.Index] - padding.X) * ratio.X); + var pointY = (int)((tensorSpan[(offset + 1) * boxInfoStride + box.Index] - padding.Y) * ratio.Y); + + var pointConfidence = metadata.KeypointShape.Channels switch + { + 2 => 1f, + 3 => tensorSpan[(offset + 2) * boxInfoStride + box.Index], + _ => throw new InvalidOperationException("Unexpected keypoint shape") + }; + + keypoints[index] = new Keypoint + { + Index = index, + Point = new Point(pointX, pointY), + Confidence = pointConfidence + }; + } + + result[i] = new Pose(keypoints) + { + Name = box.Name, + Bounds = box.Bounds, + Confidence = box.Confidence, + }; + } + + return result; + } +} \ No newline at end of file diff --git a/Source/YoloV8/Services/Parsers/RawBoundingBoxParser.cs b/Source/YoloV8/Services/Parsers/RawBoundingBoxParser.cs new file mode 100644 index 0000000..df061c7 --- /dev/null +++ b/Source/YoloV8/Services/Parsers/RawBoundingBoxParser.cs @@ -0,0 +1,195 @@ +namespace Compunet.YoloV8.Services; + +internal class RawBoundingBoxParser(YoloMetadata metadata, + YoloConfiguration configuration, + IMemoryAllocatorService memoryAllocator, + INonMaxSuppressionService nonMaxSuppression) : IRawBoundingBoxParser +{ + public T[] Parse(DenseTensor tensor, Size size) where T : IRawBoundingBox + { + var xPadding = 0; + var yPadding = 0; + + if (configuration.KeepAspectRatio) + { + var reductionRatio = Math.Min(metadata.ImageSize.Width / (float)size.Width, + metadata.ImageSize.Height / (float)size.Height); + + xPadding = (int)((metadata.ImageSize.Width - size.Width * reductionRatio) / 2); + yPadding = (int)((metadata.ImageSize.Height - size.Height * reductionRatio) / 2); + } + + return Parse(tensor, size, new Vector(xPadding, yPadding)); + } + + public T[] Parse(DenseTensor tensor, Size imageSize, Vector padding) where T : IRawBoundingBox + { + var xRatio = (float)imageSize.Width / metadata.ImageSize.Width; + var yRatio = (float)imageSize.Height / metadata.ImageSize.Height; + + if (configuration.KeepAspectRatio) + { + var maxRatio = Math.Max(xRatio, yRatio); + + xRatio = maxRatio; + yRatio = maxRatio; + } + + return Parse(tensor, padding, new Vector(xRatio, yRatio)); + } + + public T[] Parse(DenseTensor tensor, Vector padding, Vector ratio) where T : IRawBoundingBox + { + if (metadata.Architecture == YoloArchitecture.YoloV10) + { + return ParseYoloV10(tensor, padding, ratio); + } + + return ParseYoloV8(tensor, padding, ratio); + } + + private T[] ParseYoloV8(DenseTensor tensor, Vector padding, Vector ratio) where T : IRawBoundingBox + { + var stride1 = tensor.Strides[1]; + var boxesCount = tensor.Dimensions[2]; + var namesCount = metadata.Names.Length; + + var boxes = memoryAllocator.Allocate(boxesCount); + + var boxesSpan = boxes.Memory.Span; + var tensorSpan = tensor.Buffer.Span; + + var context = new RawParsingContext + { + Tensor = tensor, + Padding = padding, + Ratio = ratio, + Stride1 = stride1, + NameCount = namesCount, + }; + + var nonEmptyBoxesCount = 0; + + for (var boxIndex = 0; boxIndex < boxesCount; boxIndex++) + { + var emptyBox = true; + + for (var nameIndex = 0; nameIndex < namesCount; nameIndex++) + { + var confidence = tensorSpan[(nameIndex + 4) * stride1 + boxIndex]; + + if (confidence <= configuration.Confidence) + { + continue; + } + + emptyBox = false; + + var name = metadata.Names[nameIndex]; + var box = T.Parse(ref context, boxIndex, name, confidence, YoloArchitecture.YoloV8); + + if (box.Bounds.Width == 0 || box.Bounds.Height == 0) + { + continue; + } + + boxesSpan[boxIndex] = box; + } + + if (emptyBox) + { + boxesSpan[boxIndex] = T.Empty; + } + else + { + nonEmptyBoxesCount++; + } + } + + using var nonEmptyBoxes = GetNonEmptyBoxes(boxes.Memory.Span, nonEmptyBoxesCount); + + return nonMaxSuppression.Suppress(nonEmptyBoxes.Memory.Span, configuration.IoU); + } + + private T[] ParseYoloV10(DenseTensor tensor, Vector padding, Vector ratio) where T : IRawBoundingBox + { + var stride1 = tensor.Strides[1]; + var stride2 = tensor.Strides[2]; + + var boxesCount = tensor.Dimensions[1]; + var boxes = memoryAllocator.Allocate(boxesCount); + + var boxesSpan = boxes.Memory.Span; + var tensorSpan = tensor.Buffer.Span; + + var context = new RawParsingContext + { + Tensor = tensor, + Padding = padding, + Ratio = ratio, + Stride1 = stride1 + }; + + var nonEmptyBoxesCount = 0; + + for (var index = 0; index < boxesCount; index++) + { + var boxOffset = index * stride1; + + var confidence = tensorSpan[boxOffset + 4 * stride2]; + + if (confidence <= configuration.Confidence) + { + boxesSpan[index] = T.Empty; + continue; + } + + nonEmptyBoxesCount++; + + var name = metadata.Names[(int)tensorSpan[boxOffset + 5 * stride2]]; + var box = T.Parse(ref context, index, name, confidence, YoloArchitecture.YoloV10); + + if (box.Bounds.Width == 0 || box.Bounds.Height == 0) + { + continue; + } + + boxesSpan[index] = box; + } + + using var nonEmptyBoxes = GetNonEmptyBoxes(boxes.Memory.Span, nonEmptyBoxesCount); + + return nonMaxSuppression.Suppress(nonEmptyBoxes.Memory.Span, configuration.IoU); + } + + private IMemoryOwner GetNonEmptyBoxes(ReadOnlySpan boxes, int count) where T : IRawBoundingBox + { + //var activeCount = 0; + + //// Count the non-empty boxes + //for (var i = 0; i < boxes.Length; i++) + //{ + // if (boxes[i].IsEmpty == false) + // { + // activeCount++; + // } + //} + + var activeIndex = 0; + var activeBoxes = memoryAllocator.Allocate(count); + + for (var i = 0; i < boxes.Length; i++) + { + var box = boxes[i]; + + if (box.IsEmpty) + { + continue; + } + + activeBoxes.Memory.Span[activeIndex++] = box; + } + + return activeBoxes; + } +} \ No newline at end of file diff --git a/Source/YoloV8/Services/Parsers/SegmentationParser.cs b/Source/YoloV8/Services/Parsers/SegmentationParser.cs new file mode 100644 index 0000000..ed0bcd8 --- /dev/null +++ b/Source/YoloV8/Services/Parsers/SegmentationParser.cs @@ -0,0 +1,170 @@ +namespace Compunet.YoloV8.Services; + +internal class SegmentationParser(YoloMetadata metadata, + YoloConfiguration configuration, + IMemoryAllocatorService memoryAllocator, + IRawBoundingBoxParser rawBoundingBoxParser) : IParser +{ + public Segmentation[] ProcessTensorToResult(YoloRawOutput output, Size size) + { + var xPadding = 0; + var yPadding = 0; + + if (configuration.KeepAspectRatio) + { + var reductionRatio = Math.Min(metadata.ImageSize.Width / (float)size.Width, + metadata.ImageSize.Height / (float)size.Height); + + xPadding = (int)((metadata.ImageSize.Width - size.Width * reductionRatio) / 2); + yPadding = (int)((metadata.ImageSize.Height - size.Height * reductionRatio) / 2); + } + + var output0 = output.Output0; + var output1 = output.Output1 ?? throw new Exception(); + + var boxes = rawBoundingBoxParser.Parse(output0, size); + var maskChannelCount = output0.Dimensions[1] - 4 - metadata.Names.Length; + + var result = new Segmentation[boxes.Length]; + + for (var index = 0; index < boxes.Length; index++) + { + var box = boxes[index]; + + using var maskWeights = CollectMaskWeights(output0, box.Index, maskChannelCount, metadata.Names.Length + 4); + + var mask = ProcessMask(output1, maskWeights.Memory.Span, box.Bounds, size, metadata.ImageSize, xPadding, yPadding); + + result[index] = new Segmentation + { + Mask = mask, + Name = box.Name, + Bounds = box.Bounds, + Confidence = box.Confidence, + }; + } + + return result; + } + + private static SegmentationMask ProcessMask(Tensor prototypes, + ReadOnlySpan weights, + Rectangle bounds, + Size imageSize, + Size modelSize, + int xPadding, + int yPadding) + { + var maskChannels = prototypes.Dimensions[1]; + var maskHeight = prototypes.Dimensions[2]; + var maskWidth = prototypes.Dimensions[3]; + + if (maskChannels != weights.Length) + { + throw new InvalidOperationException(); + } + + using var bitmap = new Image(maskWidth, maskHeight); + + for (var y = 0; y < maskHeight; y++) + { + for (var x = 0; x < maskWidth; x++) + { + var value = 0F; + + for (int i = 0; i < maskChannels; i++) + { + value += prototypes[0, i, y, x] * weights[i]; + } + + value = Sigmoid(value); + + var color = GetLuminance(value); + var pixel = new L8(color); + + bitmap[x, y] = pixel; + } + } + + var xPad = xPadding * maskWidth / modelSize.Width; + var yPad = yPadding * maskHeight / modelSize.Height; + + var paddingCropRectangle = new Rectangle(xPad, + yPad, + maskWidth - xPad * 2, + maskHeight - yPad * 2); + + bitmap.Mutate(x => + { + // Crop for preprocess resize padding + x.Crop(paddingCropRectangle); + + // Resize to original image size + x.Resize(imageSize); + + // Crop for getting the object segmentation only + x.Crop(bounds); + }); + + return CreateMaskFromBitmap(bitmap); + + //var mask = new float[bounds.Width, bounds.Height]; + + //bitmap.EnumeratePixels((point, pixel) => + //{ + // mask[point.X, point.Y] = GetConfidence(pixel.PackedValue); + //}); + + //return new SegmentationMask + //{ + // Mask = mask + //}; + } + + + private IMemoryOwner CollectMaskWeights(Tensor output, int boxIndex, int maskChannelCount, int maskWeightsOffset) + { + var weights = memoryAllocator.Allocate(maskChannelCount); + var weightsSpan = weights.Memory.Span; + + for (int i = 0; i < maskChannelCount; i++) + { + weightsSpan[i] = output[0, maskWeightsOffset + i, boxIndex]; + } + + return weights; + } + + private static SegmentationMask CreateMaskFromBitmap(Image bitmap) + { + var mask = new float[bitmap.Width, bitmap.Height]; + + bitmap.ProcessPixelRows(accessor => + { + for (var y = 0; y < bitmap.Height; y++) + { + var row = accessor.GetRowSpan(y); + + for (var x = 0; x < bitmap.Width; x++) + { + mask[x, y] = GetConfidence(row[x].PackedValue); + } + } + }); + + return new SegmentationMask + { + Mask = mask + }; + } + + #region Helpers + + private static float Sigmoid(float value) => 1 / (1 + MathF.Exp(-value)); + + private static byte GetLuminance(float confidence) => (byte)((confidence * 255 - 255) * -1); + + private static float GetConfidence(byte luminance) => (luminance - 255) * -1 / 255F; + + #endregion +} \ No newline at end of file diff --git a/Source/YoloV8/Services/PreprocessService.cs b/Source/YoloV8/Services/PreprocessService.cs new file mode 100644 index 0000000..1f29508 --- /dev/null +++ b/Source/YoloV8/Services/PreprocessService.cs @@ -0,0 +1,75 @@ +namespace Compunet.YoloV8.Services; + +internal class PreprocessService : IPreprocessService +{ + public void ProcessImageToTensor(Image image, DenseTensor tensor, Vector padding) + { + // Verify tensor dimensions + if (image.Height + (padding.Y * 2) != tensor.Dimensions[2] && image.Width + (padding.X * 2) != tensor.Dimensions[3]) + { + throw new InvalidOperationException("The image size and target tensor dimensions is not match"); + } + + // Process core + ProcessToTensorCore(image, tensor, padding); + } + + private static void ProcessToTensorCore(Image image, DenseTensor tensor, Vector padding) + { + var width = image.Width; + var height = image.Height; + + // Pre-calculate strides for performance + var strideY = tensor.Strides[2]; + var strideX = tensor.Strides[3]; + var strideR = tensor.Strides[1] * 0; + var strideG = tensor.Strides[1] * 1; + var strideB = tensor.Strides[1] * 2; + + // Get a span of the whole tensor for fast access + var tensorSpan = tensor.Buffer.Span; + + // Try get continuous memory block of the entire image data + if (image.DangerousTryGetSinglePixelMemory(out var memory)) + { + var pixels = memory.Span; + var length = height * width; + + for (var index = 0; index < length; index++) + { + var x = index % width; + var y = index / width; + + var tensorIndex = strideR + strideY * (y + padding.Y) + strideX * (x + padding.X); + + var pixel = pixels[index]; + + WritePixel(tensorSpan, tensorIndex, pixel, strideR, strideG, strideB); + } + } + else + { + for (var y = 0; y < height; y++) + { + var rowSpan = image.DangerousGetPixelRowMemory(y).Span; + var tensorYIndex = strideR + strideY * (y + padding.Y); + + for (var x = 0; x < width; x++) + { + var tensorIndex = tensorYIndex + strideX * (x + padding.X); + var pixel = rowSpan[x]; + + WritePixel(tensorSpan, tensorIndex, pixel, strideR, strideG, strideB); + } + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void WritePixel(Span target, int index, Rgb24 pixel, int strideBatchR, int strideBatchG, int strideBatchB) + { + target[index] = pixel.R / 255f; + target[index + strideBatchG - strideBatchR] = pixel.G / 255f; + target[index + strideBatchB - strideBatchR] = pixel.B / 255f; + } +} \ No newline at end of file diff --git a/Source/YoloV8/Services/SerssionRunnerService.cs b/Source/YoloV8/Services/SerssionRunnerService.cs new file mode 100644 index 0000000..ac59511 --- /dev/null +++ b/Source/YoloV8/Services/SerssionRunnerService.cs @@ -0,0 +1,139 @@ +namespace Compunet.YoloV8.Services; + +internal class SessionRunnerService(InferenceSession session, + SessionTensorInfo tensorInfo, + YoloConfiguration configuration, + YoloMetadata metadata, + IPreprocessService preprocess, + IMemoryAllocatorService memoryAllocator) : ISessionRunnerService +{ + private readonly object _lock = new(); + private readonly RunOptions _runOptions = new(); + + public YoloRawOutput PreprocessAndRun(Image image, out PredictorTimer timer) + { + // Create timer + timer = new PredictorTimer(); + + // Create io binding + using var binding = session.CreateIoBinding(); + + // Create and bind raw output + var output = CreateRawOutput(binding); + + // Start pre-process timer + timer.StartPreprocess(); + + // Preprocess image to tensor and bind to ort binding + ProcessInput(image, binding); + + // Start inference timer + timer.StartInference(); + + // Run the model + if (configuration.SuppressParallelInference) + { + lock (_lock) + { + session.RunWithBinding(_runOptions, binding); + } + } + else + { + session.RunWithBinding(_runOptions, binding); + } + + // Return the yolo raw output + return output; + } + + private YoloRawOutput CreateRawOutput(OrtIoBinding binding) + { + var output0Info = tensorInfo.Output0; + var output1Info = tensorInfo.Output1; + + // Allocate output0 tensor buffer + var output0 = memoryAllocator.AllocateTensor(output0Info); + + // Bind tensor buffer to ort binding + binding.BindOutput(session.OutputNames[0], CreateOrtValue(output0.Tensor.Buffer, output0Info.Dimensions64)); + + if (output1Info != null) + { + // Allocate output1 tensor buffer + var output1 = memoryAllocator.AllocateTensor(output1Info.Value); + + // Bind tensor buffer to ort binding + binding.BindOutput(session.OutputNames[1], CreateOrtValue(output1.Tensor.Buffer, output1Info.Value.Dimensions64)); + + return new YoloRawOutput(output0, output1); + } + + return new YoloRawOutput(output0, null); + } + + #region Preprocess + + private void ProcessInput(Image image, OrtIoBinding binding) + { + // Apply auto orient if required + if (configuration.SkipImageAutoOrient == false) + { + image.AutoOrient(); + } + + // Resize the input image + using var resized = ResizeImage(image, out var padding); + + // Rent the input tensor + var inputTensor = memoryAllocator.AllocateTensor(tensorInfo.Input0, true); + + // Process the image to tensor + preprocess.ProcessImageToTensor(resized, inputTensor.Tensor, padding); + + // Create ort values + var ortInput = CreateOrtValue(inputTensor.Tensor.Buffer, tensorInfo.Input0.Dimensions64); + + // Bind input to ort io binding + binding.BindInput(session.InputNames[0], ortInput); + } + + private Image ResizeImage(Image image, out Vector padding) + { + // Get the model image input size + var inputSize = metadata.ImageSize; + + // Create resize options + var options = new ResizeOptions() + { + Size = inputSize, + + // Select resize mode according to 'keepAspectRatio' + Mode = configuration.KeepAspectRatio + ? ResizeMode.Max + : ResizeMode.Stretch, + + // Select faster resampling algorithm + Sampler = KnownResamplers.NearestNeighbor + }; + + // Create resized image + var resized = image.Clone(x => x.Resize(options)); + + // Calculate padding + padding = new Vector( + (inputSize.Width - resized.Size.Width) / 2, + (inputSize.Height - resized.Size.Height) / 2 + ); + + // Return the resized image + return resized; + } + + #endregion + + private static OrtValue CreateOrtValue(Memory buffer, long[] shape) + { + return OrtValue.CreateTensorValueFromMemory(OrtMemoryInfo.DefaultInstance, buffer, shape); + } +} \ No newline at end of file diff --git a/Source/YoloV8/Utilities/ImageContoursDetector.cs b/Source/YoloV8/Utilities/ImageContoursDetector.cs index 9e077ff..62b97c4 100644 --- a/Source/YoloV8/Utilities/ImageContoursDetector.cs +++ b/Source/YoloV8/Utilities/ImageContoursDetector.cs @@ -2,12 +2,12 @@ internal static class ImageContoursDetector { - private static readonly (Func func, int neighborhood)[] _neighborhood; + private static readonly (Func Func, int Neighborhood)[] _neighborhood; static ImageContoursDetector() { - _neighborhood = new (Func, int)[] - { + _neighborhood = + [ (point => new Point(point.X - 1, point.Y), 7), (point => new Point(point.X - 1, point.Y - 1), 7), (point => new Point(point.X, point.Y - 1), 1), @@ -16,23 +16,21 @@ static ImageContoursDetector() (point => new Point(point.X + 1, point.Y + 1), 3), (point => new Point(point.X, point.Y+1), 5), (point => new Point(point.X -1, point.Y + 1), 5) - }; + ]; } - public static IReadOnlyList> FindContours(this Image image) + public static Point[][] FindContours(this Image image) { var luminance = image.CloneAs(); - var found = new HashSet(); + var inside = false; + var contours = new List(); - bool inside = false; - - var contours = new List>(); - - for (int y = 0; y < luminance.Height; y++) - for (int x = 0; x < luminance.Width; x++) + for (var y = 0; y < luminance.Height; y++) + { + for (var x = 0; x < luminance.Width; x++) { - Point point = new(x, y); + var point = new Point(x, y); if (found.Contains(point) && !inside) { @@ -40,10 +38,12 @@ public static IReadOnlyList> FindContours(this Image image) continue; } - bool transparent = IsTransparent(luminance, point); + var transparent = IsTransparent(luminance, point); if (!transparent && inside) + { continue; + } if (transparent && inside) { @@ -55,22 +55,19 @@ public static IReadOnlyList> FindContours(this Image image) { var contour = new List(); - contours.Add(contour); - found.Add(point); contour.Add(point); - int checkLocationNr = 1; - Point startPos = point; + var checkLocationNr = 1; + var startPos = point; - int counter1 = 0; - int counter2 = 0; + var counter1 = 0; + var counter2 = 0; while (true) { - Point checkPosition = _neighborhood[checkLocationNr - 1].func(point); - - int newCheckLocationNr = _neighborhood[checkLocationNr - 1].neighborhood; + var checkPosition = _neighborhood[checkLocationNr - 1].Func(point); + var newCheckLocationNr = _neighborhood[checkLocationNr - 1].Neighborhood; if (!IsTransparent(luminance, checkPosition)) { @@ -96,15 +93,22 @@ public static IReadOnlyList> FindContours(this Image image) checkLocationNr = 1 + (checkLocationNr % 8); if (counter2 > 8) + { break; + } else + { counter2++; + } } } + + contours.Add([.. contour]); } } + } - return contours; + return [.. contours]; } private static bool IsTransparent(Image image, Point pixel) diff --git a/Source/YoloV8/Utilities/NonMaxSuppressionHelper.cs b/Source/YoloV8/Utilities/NonMaxSuppressionHelper.cs deleted file mode 100644 index cc3e843..0000000 --- a/Source/YoloV8/Utilities/NonMaxSuppressionHelper.cs +++ /dev/null @@ -1,96 +0,0 @@ -namespace Compunet.YoloV8.Utilities; - -internal static class NonMaxSuppressionHelper -{ - private static readonly ArrayPool _arrayPool = ArrayPool.Create(); - - public static IndexedBoundingBox[] Suppress(IndexedBoundingBox[] boxes, float iouThreshold) - { - Array.Sort(boxes); - - var boxCount = boxes.Length; - - var activeCount = boxCount; - - var isNotActiveBoxes = _arrayPool.Rent(boxCount); - - try - { - var selected = new List(); - - for (int i = boxCount - 1; i >= 0; i--) - { - if (isNotActiveBoxes[i]) - { - continue; - } - - var boxA = boxes[i]; - - selected.Add(boxA); - - for (var j = i; j >= 0; j--) - { - if (isNotActiveBoxes[j]) - { - continue; - } - - var boxB = boxes[j]; - - if (boxA.Class == boxB.Class) - { - if (CalculateIoU(boxA.Bounds, boxB.Bounds) > iouThreshold) - { - isNotActiveBoxes[j] = true; - - activeCount--; - - if (activeCount <= 0) - { - break; - } - } - } - } - - if (activeCount <= 0) - { - break; - } - } - - return [.. selected]; - } - finally - { - _arrayPool.Return(isNotActiveBoxes, true); - } - } - - private static float CalculateIoU(Rectangle rectA, Rectangle rectB) - { - var areaA = Area(rectA); - - if (areaA <= 0f) - { - return 0f; - } - - var areaB = Area(rectB); - - if (areaB <= 0f) - { - return 0f; - } - - var intersectionArea = Area(Rectangle.Intersect(rectA, rectB)); - - return (float)intersectionArea / (areaA + areaB - intersectionArea); - } - - private static int Area(Rectangle rectangle) - { - return rectangle.Width * rectangle.Height; - } -} \ No newline at end of file diff --git a/Source/YoloV8/Utilities/ObbNonMaxSuppressionHelper.cs b/Source/YoloV8/Utilities/ObbNonMaxSuppressionHelper.cs deleted file mode 100644 index d69a0c5..0000000 --- a/Source/YoloV8/Utilities/ObbNonMaxSuppressionHelper.cs +++ /dev/null @@ -1,111 +0,0 @@ -namespace Compunet.YoloV8.Utilities; - -internal static class ObbNonMaxSuppressionHelper -{ - private static readonly ArrayPool _arrayPool = ArrayPool.Create(); - - public static ObbIndexedBoundingBox[] Suppress(ObbIndexedBoundingBox[] boxes, float iouThreshold) - { - Array.Sort(boxes); - - var boxCount = boxes.Length; - - var activeCount = boxCount; - - var isNotActiveBoxes = _arrayPool.Rent(boxCount); - - try - { - var selected = new List(); - - for (int i = 0; i < boxCount; i++) - { - if (isNotActiveBoxes[i]) - { - continue; - } - - var boxA = boxes[i]; - - selected.Add(boxA); - - for (var j = i + 1; j < boxCount; j++) - { - if (isNotActiveBoxes[j]) - { - continue; - } - - var boxB = boxes[j]; - - if (CalculateIoU(boxA, boxB) > iouThreshold) - { - isNotActiveBoxes[j] = true; - - activeCount--; - - if (activeCount <= 0) - { - break; - } - } - } - - if (activeCount <= 0) - { - break; - } - } - - return [.. selected]; - } - finally - { - _arrayPool.Return(isNotActiveBoxes, true); - } - } - - private static double CalculateIoU(this ObbIndexedBoundingBox box1, ObbIndexedBoundingBox box2) - { - var areaA = Area(box1); - - if (areaA <= 0f) - { - return 0f; - } - - var areaB = Area(box2); - - if (areaB <= 0f) - { - return 0f; - } - - var vertices1 = box1.GetCornerPoints(); - var vertices2 = box2.GetCornerPoints(); - - var rect1 = new Path64(vertices1.Select(v => new Point64(v.X, v.Y))); - var rect2 = new Path64(vertices2.Select(v => new Point64(v.X, v.Y))); - - var subject = new Paths64([rect1]); - var clip = new Paths64([rect2]); - - var intersection = Clipper.Intersect(subject, clip, FillRule.EvenOdd); - var union = Clipper.Union(subject, clip, FillRule.EvenOdd); - - if (intersection.Count == 0 || union.Count == 0) - { - return 0f; - } - - var intersectionArea = Clipper.Area(intersection[0]); - var unionArea = Clipper.Area(union[0]); - - return intersectionArea / unionArea; - } - - private static int Area(ObbIndexedBoundingBox obb) - { - return obb.Bounds.Width * obb.Bounds.Height; - } -} \ No newline at end of file diff --git a/Source/YoloV8/Utilities/PreprocessHelper.cs b/Source/YoloV8/Utilities/PreprocessHelper.cs deleted file mode 100644 index a36784e..0000000 --- a/Source/YoloV8/Utilities/PreprocessHelper.cs +++ /dev/null @@ -1,67 +0,0 @@ -namespace Compunet.YoloV8.Utilities; - -internal static class PreprocessHelper -{ - public static void ProcessToTensor(Image image, Size modelSize, bool originalAspectRatio, DenseTensor target, int batch) - { - var options = new ResizeOptions() - { - Size = modelSize, - Mode = originalAspectRatio ? ResizeMode.Max : ResizeMode.Stretch, - }; - - image.Mutate(x => x.Resize(options)); - - var xPadding = (modelSize.Width - image.Width) / 2; - var yPadding = (modelSize.Height - image.Height) / 2; - - var width = image.Width; - var height = image.Height; - - // Pre-calculate strides for performance - var strideBatchR = target.Strides[0] * batch + target.Strides[1] * 0; - var strideBatchG = target.Strides[0] * batch + target.Strides[1] * 1; - var strideBatchB = target.Strides[0] * batch + target.Strides[1] * 2; - var strideY = target.Strides[2]; - var strideX = target.Strides[3]; - - // Get a span of the whole tensor for fast access - var tensorSpan = target.Buffer; - - // Try get continuous memory block of the entire image data - if (image.DangerousTryGetSinglePixelMemory(out var memory)) - { - Parallel.For(0, width * height, index => - { - int x = index % width; - int y = index / width; - int tensorIndex = strideBatchR + strideY * (y + yPadding) + strideX * (x + xPadding); - - var pixel = memory.Span[index]; - WritePixel(tensorSpan.Span, tensorIndex, pixel, strideBatchR, strideBatchG, strideBatchB); - }); - } - else - { - Parallel.For(0, height, y => - { - var rowSpan = image.DangerousGetPixelRowMemory(y).Span; - int tensorYIndex = strideBatchR + strideY * (y + yPadding); - - for (int x = 0; x < width; x++) - { - int tensorIndex = tensorYIndex + strideX * (x + xPadding); - var pixel = rowSpan[x]; - WritePixel(tensorSpan.Span, tensorIndex, pixel, strideBatchR, strideBatchG, strideBatchB); - } - }); - } - } - - private static void WritePixel(Span tensorSpan, int tensorIndex, Rgb24 pixel, int strideBatchR, int strideBatchG, int strideBatchB) - { - tensorSpan[tensorIndex] = pixel.R / 255f; - tensorSpan[tensorIndex + strideBatchG - strideBatchR] = pixel.G / 255f; - tensorSpan[tensorIndex + strideBatchB - strideBatchR] = pixel.B / 255f; - } -} \ No newline at end of file diff --git a/Source/YoloV8/Utilities/Vector.cs b/Source/YoloV8/Utilities/Vector.cs new file mode 100644 index 0000000..e9d05cd --- /dev/null +++ b/Source/YoloV8/Utilities/Vector.cs @@ -0,0 +1,12 @@ +namespace Compunet.YoloV8.Utilities; + +internal readonly struct Vector(T x, T y) +{ + public static Vector Default = new(); + + public T X => x; + + public T Y => y; + + public override string ToString() => $"X = {x}, Y = {y}"; +} \ No newline at end of file diff --git a/Source/YoloV8/YoloV8.csproj b/Source/YoloV8/YoloV8.csproj index 037ef9f..d611609 100644 --- a/Source/YoloV8/YoloV8.csproj +++ b/Source/YoloV8/YoloV8.csproj @@ -22,10 +22,10 @@ true True Use YOLOv8 in real-time for object detection, instance segmentation, pose estimation and image classification, via ONNX Runtime - https://github.com/dme-compunet/YOLOv8 + https://github.com/dme-compunet/YoloV8 AGPL-3.0-or-later README.md - image-classification object-detection pose-estimation instance-segmentation onnx imagesharp onnx-runtime ultralytics yolov8 + image-classification object-detection pose-estimation instance-segmentation onnx imagesharp onnx-runtime ultralytics yolov8 yolov10 Icon.png Compunet 4.2.0 @@ -40,6 +40,7 @@ +