From f43721ec928dcdbd5df246efebd816f92abb2359 Mon Sep 17 00:00:00 2001
From: Mert <101130780+mertalev@users.noreply.github.com>
Date: Wed, 10 Jul 2024 10:20:43 -0400
Subject: [PATCH] fix(ml): armnn not being used (#10929)

* fix armnn not being used, move fallback handling to main, add tests

* formatting
---
 machine-learning/app/conftest.py              |  6 ++
 machine-learning/app/main.py                  | 13 ++-
 machine-learning/app/models/base.py           | 23 +++--
 machine-learning/app/models/clip/textual.py   |  3 +-
 .../models/facial_recognition/detection.py    | 11 +--
 .../models/facial_recognition/recognition.py  | 14 +--
 machine-learning/app/test_main.py             | 85 ++++++++++++++++---
 7 files changed, 111 insertions(+), 44 deletions(-)

diff --git a/machine-learning/app/conftest.py b/machine-learning/app/conftest.py
index 9548ad01482a6..6f5d5ea7058bf 100644
--- a/machine-learning/app/conftest.py
+++ b/machine-learning/app/conftest.py
@@ -168,6 +168,12 @@ def warning() -> Iterator[mock.Mock]:
         yield mocked
 
 
+@pytest.fixture(scope="function")
+def exception() -> Iterator[mock.Mock]:
+    with mock.patch.object(log, "exception") as mocked:
+        yield mocked
+
+
 @pytest.fixture(scope="function")
 def snapshot_download() -> Iterator[mock.Mock]:
     with mock.patch("app.models.base.snapshot_download") as mocked:
diff --git a/machine-learning/app/main.py b/machine-learning/app/main.py
index ac493a059a013..000119937e74a 100644
--- a/machine-learning/app/main.py
+++ b/machine-learning/app/main.py
@@ -29,6 +29,7 @@
     InferenceEntry,
     InferenceResponse,
     MessageResponse,
+    ModelFormat,
     ModelIdentity,
     ModelTask,
     ModelType,
@@ -195,7 +196,17 @@ def _load(model: InferenceModel) -> InferenceModel:
         if model.load_attempts > 1:
             raise HTTPException(500, f"Failed to load model '{model.model_name}'")
         with lock:
-            model.load()
+            try:
+                model.load()
+            except FileNotFoundError as e:
+                if model.model_format == ModelFormat.ONNX:
+                    raise e
+                log.exception(e)
+                log.warning(
+                    f"{model.model_format.upper()} is available, but model '{model.model_name}' does not support it."
+                )
+                model.model_format = ModelFormat.ONNX
+                model.load()
         return model
 
     try:
diff --git a/machine-learning/app/models/base.py b/machine-learning/app/models/base.py
index 8f115c77f131c..1c019969b4790 100644
--- a/machine-learning/app/models/base.py
+++ b/machine-learning/app/models/base.py
@@ -23,7 +23,7 @@ def __init__(
         self,
         model_name: str,
         cache_dir: Path | str | None = None,
-        preferred_format: ModelFormat | None = None,
+        model_format: ModelFormat | None = None,
         session: ModelSession | None = None,
         **model_kwargs: Any,
     ) -> None:
@@ -31,7 +31,7 @@ def __init__(
         self.load_attempts = 0
         self.model_name = clean_name(model_name)
         self.cache_dir = Path(cache_dir) if cache_dir is not None else self._cache_dir_default
-        self.model_format = preferred_format if preferred_format is not None else self._model_format_default
+        self.model_format = model_format if model_format is not None else self._model_format_default
         if session is not None:
             self.session = session
 
@@ -48,7 +48,7 @@ def load(self) -> None:
         self.load_attempts += 1
 
         self.download()
-        attempt = f"Attempt #{self.load_attempts + 1} to load" if self.load_attempts else "Loading"
+        attempt = f"Attempt #{self.load_attempts} to load" if self.load_attempts > 1 else "Loading"
         log.info(f"{attempt} {self.model_type.replace('-', ' ')} model '{self.model_name}' to memory")
         self.session = self._load()
         self.loaded = True
@@ -101,6 +101,9 @@ def clear_cache(self) -> None:
         self.cache_dir.mkdir(parents=True, exist_ok=True)
 
     def _make_session(self, model_path: Path) -> ModelSession:
+        if not model_path.is_file():
+            raise FileNotFoundError(f"Model file not found: {model_path}")
+
         match model_path.suffix:
             case ".armnn":
                 session: ModelSession = AnnSession(model_path)
@@ -144,17 +147,13 @@ def cached(self) -> bool:
 
     @property
     def model_format(self) -> ModelFormat:
-        return self._preferred_format
+        return self._model_format
 
     @model_format.setter
-    def model_format(self, preferred_format: ModelFormat) -> None:
-        log.debug(f"Setting preferred format to {preferred_format}")
-        self._preferred_format = preferred_format
+    def model_format(self, model_format: ModelFormat) -> None:
+        log.debug(f"Setting model format to {model_format}")
+        self._model_format = model_format
 
     @property
     def _model_format_default(self) -> ModelFormat:
-        prefer_ann = ann.ann.is_available and settings.ann
-        ann_exists = (self.model_dir / "model.armnn").is_file()
-        if prefer_ann and not ann_exists:
-            log.warning(f"ARM NN is available, but '{self.model_name}' does not support ARM NN. Falling back to ONNX.")
-        return ModelFormat.ARMNN if prefer_ann and ann_exists else ModelFormat.ONNX
+        return ModelFormat.ARMNN if ann.ann.is_available and settings.ann else ModelFormat.ONNX
diff --git a/machine-learning/app/models/clip/textual.py b/machine-learning/app/models/clip/textual.py
index 4d86bf05b01cc..7a25c2f4ad5bf 100644
--- a/machine-learning/app/models/clip/textual.py
+++ b/machine-learning/app/models/clip/textual.py
@@ -22,11 +22,12 @@ def _predict(self, inputs: str, **kwargs: Any) -> NDArray[np.float32]:
         return res
 
     def _load(self) -> ModelSession:
+        session = super()._load()
         log.debug(f"Loading tokenizer for CLIP model '{self.model_name}'")
         self.tokenizer = self._load_tokenizer()
         log.debug(f"Loaded tokenizer for CLIP model '{self.model_name}'")
 
-        return super()._load()
+        return session
 
     @abstractmethod
     def _load_tokenizer(self) -> Tokenizer:
diff --git a/machine-learning/app/models/facial_recognition/detection.py b/machine-learning/app/models/facial_recognition/detection.py
index 2efacd447eb71..fdbcafffb588d 100644
--- a/machine-learning/app/models/facial_recognition/detection.py
+++ b/machine-learning/app/models/facial_recognition/detection.py
@@ -1,4 +1,3 @@
-from pathlib import Path
 from typing import Any
 
 import numpy as np
@@ -14,15 +13,9 @@ class FaceDetector(InferenceModel):
     depends = []
     identity = (ModelType.DETECTION, ModelTask.FACIAL_RECOGNITION)
 
-    def __init__(
-        self,
-        model_name: str,
-        min_score: float = 0.7,
-        cache_dir: Path | str | None = None,
-        **model_kwargs: Any,
-    ) -> None:
+    def __init__(self, model_name: str, min_score: float = 0.7, **model_kwargs: Any) -> None:
         self.min_score = model_kwargs.pop("minScore", min_score)
-        super().__init__(model_name, cache_dir, **model_kwargs)
+        super().__init__(model_name, **model_kwargs)
 
     def _load(self) -> ModelSession:
         session = self._make_session(self.model_path)
diff --git a/machine-learning/app/models/facial_recognition/recognition.py b/machine-learning/app/models/facial_recognition/recognition.py
index 24ce816385c25..d9ceb12b6d590 100644
--- a/machine-learning/app/models/facial_recognition/recognition.py
+++ b/machine-learning/app/models/facial_recognition/recognition.py
@@ -9,7 +9,7 @@
 from onnx.tools.update_model_dims import update_inputs_outputs_dims
 from PIL import Image
 
-from app.config import clean_name, log
+from app.config import log
 from app.models.base import InferenceModel
 from app.models.transforms import decode_cv2
 from app.schemas import FaceDetectionOutput, FacialRecognitionOutput, ModelFormat, ModelSession, ModelTask, ModelType
@@ -20,20 +20,14 @@ class FaceRecognizer(InferenceModel):
     depends = [(ModelType.DETECTION, ModelTask.FACIAL_RECOGNITION)]
     identity = (ModelType.RECOGNITION, ModelTask.FACIAL_RECOGNITION)
 
-    def __init__(
-        self,
-        model_name: str,
-        min_score: float = 0.7,
-        cache_dir: Path | str | None = None,
-        **model_kwargs: Any,
-    ) -> None:
-        super().__init__(clean_name(model_name), cache_dir, **model_kwargs)
+    def __init__(self, model_name: str, min_score: float = 0.7, **model_kwargs: Any) -> None:
+        super().__init__(model_name, **model_kwargs)
         self.min_score = model_kwargs.pop("minScore", min_score)
         self.batch = self.model_format == ModelFormat.ONNX
 
     def _load(self) -> ModelSession:
         session = self._make_session(self.model_path)
-        if self.model_format == ModelFormat.ONNX and not has_batch_axis(session):
+        if self.batch and not has_batch_axis(session):
             self._add_batch_axis(self.model_path)
             session = self._make_session(self.model_path)
         self.model = ArcFaceONNX(
diff --git a/machine-learning/app/test_main.py b/machine-learning/app/test_main.py
index a424c8236a4db..2c81421e52a9d 100644
--- a/machine-learning/app/test_main.py
+++ b/machine-learning/app/test_main.py
@@ -43,7 +43,7 @@ def test_sets_cache_dir_kwarg(self) -> None:
 
         assert encoder.cache_dir == cache_dir
 
-    def test_sets_default_preferred_format(self, mocker: MockerFixture) -> None:
+    def test_sets_default_model_format(self, mocker: MockerFixture) -> None:
         mocker.patch.object(settings, "ann", True)
         mocker.patch("ann.ann.is_available", False)
 
@@ -51,7 +51,7 @@ def test_sets_default_preferred_format(self, mocker: MockerFixture) -> None:
 
         assert encoder.model_format == ModelFormat.ONNX
 
-    def test_sets_default_preferred_format_to_armnn_if_available(self, path: mock.Mock, mocker: MockerFixture) -> None:
+    def test_sets_default_model_format_to_armnn_if_available(self, path: mock.Mock, mocker: MockerFixture) -> None:
         mocker.patch.object(settings, "ann", True)
         mocker.patch("ann.ann.is_available", True)
         path.suffix = ".armnn"
@@ -60,11 +60,11 @@ def test_sets_default_preferred_format_to_armnn_if_available(self, path: mock.Mo
 
         assert encoder.model_format == ModelFormat.ARMNN
 
-    def test_sets_preferred_format_kwarg(self, mocker: MockerFixture) -> None:
+    def test_sets_model_format_kwarg(self, mocker: MockerFixture) -> None:
         mocker.patch.object(settings, "ann", False)
         mocker.patch("ann.ann.is_available", False)
 
-        encoder = OpenClipTextualEncoder("ViT-B-32__openai", preferred_format=ModelFormat.ARMNN)
+        encoder = OpenClipTextualEncoder("ViT-B-32__openai", model_format=ModelFormat.ARMNN)
 
         assert encoder.model_format == ModelFormat.ARMNN
 
@@ -129,7 +129,7 @@ def test_download(self, snapshot_download: mock.Mock) -> None:
         )
 
     def test_download_downloads_armnn_if_preferred_format(self, snapshot_download: mock.Mock) -> None:
-        encoder = OpenClipTextualEncoder("ViT-B-32__openai", preferred_format=ModelFormat.ARMNN)
+        encoder = OpenClipTextualEncoder("ViT-B-32__openai", model_format=ModelFormat.ARMNN)
         encoder.download()
 
         snapshot_download.assert_called_once_with(
@@ -140,6 +140,19 @@ def test_download_downloads_armnn_if_preferred_format(self, snapshot_download: m
             ignore_patterns=[],
         )
 
+    def test_throws_exception_if_model_path_does_not_exist(
+        self, snapshot_download: mock.Mock, ort_session: mock.Mock, path: mock.Mock
+    ) -> None:
+        path.return_value.__truediv__.return_value.__truediv__.return_value.is_file.return_value = False
+
+        encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=path)
+
+        with pytest.raises(FileNotFoundError):
+            encoder.load()
+
+        snapshot_download.assert_called_once()
+        ort_session.assert_not_called()
+
 
 @pytest.mark.usefixtures("ort_session")
 class TestOrtSession:
@@ -467,16 +480,18 @@ def test_recognition(self, cv_image: cv2.Mat, mocker: MockerFixture) -> None:
         assert isinstance(call_args[0][0], np.ndarray)
         assert call_args[0][0].shape == (112, 112, 3)
 
-    def test_recognition_adds_batch_axis_for_ort(self, ort_session: mock.Mock, mocker: MockerFixture) -> None:
+    def test_recognition_adds_batch_axis_for_ort(
+        self, ort_session: mock.Mock, path: mock.Mock, mocker: MockerFixture
+    ) -> None:
         onnx = mocker.patch("app.models.facial_recognition.recognition.onnx", autospec=True)
         update_dims = mocker.patch(
             "app.models.facial_recognition.recognition.update_inputs_outputs_dims", autospec=True
         )
         mocker.patch("app.models.base.InferenceModel.download")
         mocker.patch("app.models.facial_recognition.recognition.ArcFaceONNX")
-
         ort_session.return_value.get_inputs.return_value = [SimpleNamespace(name="input.1", shape=(1, 3, 224, 224))]
         ort_session.return_value.get_outputs.return_value = [SimpleNamespace(name="output.1", shape=(1, 800))]
+        path.return_value.__truediv__.return_value.__truediv__.return_value.suffix = ".onnx"
 
         proto = mock.Mock()
 
@@ -492,27 +507,30 @@ def test_recognition_adds_batch_axis_for_ort(self, ort_session: mock.Mock, mocke
 
         onnx.load.return_value = proto
 
-        face_recognizer = FaceRecognizer("buffalo_s")
+        face_recognizer = FaceRecognizer("buffalo_s", cache_dir=path)
         face_recognizer.load()
 
         assert face_recognizer.batch is True
         update_dims.assert_called_once_with(proto, {"input.1": ["batch", 3, 224, 224]}, {"output.1": ["batch", 800]})
         onnx.save.assert_called_once_with(update_dims.return_value, face_recognizer.model_path)
 
-    def test_recognition_does_not_add_batch_axis_if_exists(self, ort_session: mock.Mock, mocker: MockerFixture) -> None:
+    def test_recognition_does_not_add_batch_axis_if_exists(
+        self, ort_session: mock.Mock, path: mock.Mock, mocker: MockerFixture
+    ) -> None:
         onnx = mocker.patch("app.models.facial_recognition.recognition.onnx", autospec=True)
         update_dims = mocker.patch(
             "app.models.facial_recognition.recognition.update_inputs_outputs_dims", autospec=True
         )
         mocker.patch("app.models.base.InferenceModel.download")
         mocker.patch("app.models.facial_recognition.recognition.ArcFaceONNX")
+        path.return_value.__truediv__.return_value.__truediv__.return_value.suffix = ".onnx"
 
         inputs = [SimpleNamespace(name="input.1", shape=("batch", 3, 224, 224))]
         outputs = [SimpleNamespace(name="output.1", shape=("batch", 800))]
         ort_session.return_value.get_inputs.return_value = inputs
         ort_session.return_value.get_outputs.return_value = outputs
 
-        face_recognizer = FaceRecognizer("buffalo_s")
+        face_recognizer = FaceRecognizer("buffalo_s", cache_dir=path)
         face_recognizer.load()
 
         assert face_recognizer.batch is True
@@ -520,6 +538,30 @@ def test_recognition_does_not_add_batch_axis_if_exists(self, ort_session: mock.M
         onnx.load.assert_not_called()
         onnx.save.assert_not_called()
 
+    def test_recognition_does_not_add_batch_axis_for_armnn(
+        self, ann_session: mock.Mock, path: mock.Mock, mocker: MockerFixture
+    ) -> None:
+        onnx = mocker.patch("app.models.facial_recognition.recognition.onnx", autospec=True)
+        update_dims = mocker.patch(
+            "app.models.facial_recognition.recognition.update_inputs_outputs_dims", autospec=True
+        )
+        mocker.patch("app.models.base.InferenceModel.download")
+        mocker.patch("app.models.facial_recognition.recognition.ArcFaceONNX")
+        path.return_value.__truediv__.return_value.__truediv__.return_value.suffix = ".armnn"
+
+        inputs = [SimpleNamespace(name="input.1", shape=("batch", 3, 224, 224))]
+        outputs = [SimpleNamespace(name="output.1", shape=("batch", 800))]
+        ann_session.return_value.get_inputs.return_value = inputs
+        ann_session.return_value.get_outputs.return_value = outputs
+
+        face_recognizer = FaceRecognizer("buffalo_s", model_format=ModelFormat.ARMNN, cache_dir=path)
+        face_recognizer.load()
+
+        assert face_recognizer.batch is False
+        update_dims.assert_not_called()
+        onnx.load.assert_not_called()
+        onnx.save.assert_not_called()
+
 
 @pytest.mark.asyncio
 class TestCache:
@@ -693,7 +735,7 @@ async def test_load_clears_cache_and_retries_if_os_error(self) -> None:
         mock_model.clear_cache.assert_called_once()
         assert mock_model.load.call_count == 2
 
-    async def test_load_clears_cache_and_raises_if_os_error_and_already_retried(self) -> None:
+    async def test_load_raises_if_os_error_and_already_retried(self) -> None:
         mock_model = mock.Mock(spec=InferenceModel)
         mock_model.model_name = "test_model_name"
         mock_model.model_type = ModelType.VISUAL
@@ -707,6 +749,27 @@ async def test_load_clears_cache_and_raises_if_os_error_and_already_retried(self
         mock_model.clear_cache.assert_not_called()
         mock_model.load.assert_not_called()
 
+    async def test_falls_back_to_onnx_if_other_format_does_not_exist(
+        self, exception: mock.Mock, warning: mock.Mock
+    ) -> None:
+        mock_model = mock.Mock(spec=InferenceModel)
+        mock_model.model_name = "test_model_name"
+        mock_model.model_type = ModelType.VISUAL
+        mock_model.model_task = ModelTask.SEARCH
+        mock_model.model_format = ModelFormat.ARMNN
+        mock_model.loaded = False
+        mock_model.load_attempts = 0
+        error = FileNotFoundError()
+        mock_model.load.side_effect = [error, None]
+
+        await load(mock_model)
+
+        mock_model.clear_cache.assert_not_called()
+        assert mock_model.load.call_count == 2
+        exception.assert_called_once_with(error)
+        warning.assert_called_once_with("ARMNN is available, but model 'test_model_name' does not support it.")
+        mock_model.model_format = ModelFormat.ONNX
+
 
 @pytest.mark.skipif(
     not settings.test_full,