From f43721ec928dcdbd5df246efebd816f92abb2359 Mon Sep 17 00:00:00 2001 From: Mert <101130780+mertalev@users.noreply.github.com> Date: Wed, 10 Jul 2024 10:20:43 -0400 Subject: [PATCH] fix(ml): armnn not being used (#10929) * fix armnn not being used, move fallback handling to main, add tests * formatting --- machine-learning/app/conftest.py | 6 ++ machine-learning/app/main.py | 13 ++- machine-learning/app/models/base.py | 23 +++-- machine-learning/app/models/clip/textual.py | 3 +- .../models/facial_recognition/detection.py | 11 +-- .../models/facial_recognition/recognition.py | 14 +-- machine-learning/app/test_main.py | 85 ++++++++++++++++--- 7 files changed, 111 insertions(+), 44 deletions(-) diff --git a/machine-learning/app/conftest.py b/machine-learning/app/conftest.py index 9548ad01482a6..6f5d5ea7058bf 100644 --- a/machine-learning/app/conftest.py +++ b/machine-learning/app/conftest.py @@ -168,6 +168,12 @@ def warning() -> Iterator[mock.Mock]: yield mocked +@pytest.fixture(scope="function") +def exception() -> Iterator[mock.Mock]: + with mock.patch.object(log, "exception") as mocked: + yield mocked + + @pytest.fixture(scope="function") def snapshot_download() -> Iterator[mock.Mock]: with mock.patch("app.models.base.snapshot_download") as mocked: diff --git a/machine-learning/app/main.py b/machine-learning/app/main.py index ac493a059a013..000119937e74a 100644 --- a/machine-learning/app/main.py +++ b/machine-learning/app/main.py @@ -29,6 +29,7 @@ InferenceEntry, InferenceResponse, MessageResponse, + ModelFormat, ModelIdentity, ModelTask, ModelType, @@ -195,7 +196,17 @@ def _load(model: InferenceModel) -> InferenceModel: if model.load_attempts > 1: raise HTTPException(500, f"Failed to load model '{model.model_name}'") with lock: - model.load() + try: + model.load() + except FileNotFoundError as e: + if model.model_format == ModelFormat.ONNX: + raise e + log.exception(e) + log.warning( + f"{model.model_format.upper()} is available, but model '{model.model_name}' does not support it." + ) + model.model_format = ModelFormat.ONNX + model.load() return model try: diff --git a/machine-learning/app/models/base.py b/machine-learning/app/models/base.py index 8f115c77f131c..1c019969b4790 100644 --- a/machine-learning/app/models/base.py +++ b/machine-learning/app/models/base.py @@ -23,7 +23,7 @@ def __init__( self, model_name: str, cache_dir: Path | str | None = None, - preferred_format: ModelFormat | None = None, + model_format: ModelFormat | None = None, session: ModelSession | None = None, **model_kwargs: Any, ) -> None: @@ -31,7 +31,7 @@ def __init__( self.load_attempts = 0 self.model_name = clean_name(model_name) self.cache_dir = Path(cache_dir) if cache_dir is not None else self._cache_dir_default - self.model_format = preferred_format if preferred_format is not None else self._model_format_default + self.model_format = model_format if model_format is not None else self._model_format_default if session is not None: self.session = session @@ -48,7 +48,7 @@ def load(self) -> None: self.load_attempts += 1 self.download() - attempt = f"Attempt #{self.load_attempts + 1} to load" if self.load_attempts else "Loading" + attempt = f"Attempt #{self.load_attempts} to load" if self.load_attempts > 1 else "Loading" log.info(f"{attempt} {self.model_type.replace('-', ' ')} model '{self.model_name}' to memory") self.session = self._load() self.loaded = True @@ -101,6 +101,9 @@ def clear_cache(self) -> None: self.cache_dir.mkdir(parents=True, exist_ok=True) def _make_session(self, model_path: Path) -> ModelSession: + if not model_path.is_file(): + raise FileNotFoundError(f"Model file not found: {model_path}") + match model_path.suffix: case ".armnn": session: ModelSession = AnnSession(model_path) @@ -144,17 +147,13 @@ def cached(self) -> bool: @property def model_format(self) -> ModelFormat: - return self._preferred_format + return self._model_format @model_format.setter - def model_format(self, preferred_format: ModelFormat) -> None: - log.debug(f"Setting preferred format to {preferred_format}") - self._preferred_format = preferred_format + def model_format(self, model_format: ModelFormat) -> None: + log.debug(f"Setting model format to {model_format}") + self._model_format = model_format @property def _model_format_default(self) -> ModelFormat: - prefer_ann = ann.ann.is_available and settings.ann - ann_exists = (self.model_dir / "model.armnn").is_file() - if prefer_ann and not ann_exists: - log.warning(f"ARM NN is available, but '{self.model_name}' does not support ARM NN. Falling back to ONNX.") - return ModelFormat.ARMNN if prefer_ann and ann_exists else ModelFormat.ONNX + return ModelFormat.ARMNN if ann.ann.is_available and settings.ann else ModelFormat.ONNX diff --git a/machine-learning/app/models/clip/textual.py b/machine-learning/app/models/clip/textual.py index 4d86bf05b01cc..7a25c2f4ad5bf 100644 --- a/machine-learning/app/models/clip/textual.py +++ b/machine-learning/app/models/clip/textual.py @@ -22,11 +22,12 @@ def _predict(self, inputs: str, **kwargs: Any) -> NDArray[np.float32]: return res def _load(self) -> ModelSession: + session = super()._load() log.debug(f"Loading tokenizer for CLIP model '{self.model_name}'") self.tokenizer = self._load_tokenizer() log.debug(f"Loaded tokenizer for CLIP model '{self.model_name}'") - return super()._load() + return session @abstractmethod def _load_tokenizer(self) -> Tokenizer: diff --git a/machine-learning/app/models/facial_recognition/detection.py b/machine-learning/app/models/facial_recognition/detection.py index 2efacd447eb71..fdbcafffb588d 100644 --- a/machine-learning/app/models/facial_recognition/detection.py +++ b/machine-learning/app/models/facial_recognition/detection.py @@ -1,4 +1,3 @@ -from pathlib import Path from typing import Any import numpy as np @@ -14,15 +13,9 @@ class FaceDetector(InferenceModel): depends = [] identity = (ModelType.DETECTION, ModelTask.FACIAL_RECOGNITION) - def __init__( - self, - model_name: str, - min_score: float = 0.7, - cache_dir: Path | str | None = None, - **model_kwargs: Any, - ) -> None: + def __init__(self, model_name: str, min_score: float = 0.7, **model_kwargs: Any) -> None: self.min_score = model_kwargs.pop("minScore", min_score) - super().__init__(model_name, cache_dir, **model_kwargs) + super().__init__(model_name, **model_kwargs) def _load(self) -> ModelSession: session = self._make_session(self.model_path) diff --git a/machine-learning/app/models/facial_recognition/recognition.py b/machine-learning/app/models/facial_recognition/recognition.py index 24ce816385c25..d9ceb12b6d590 100644 --- a/machine-learning/app/models/facial_recognition/recognition.py +++ b/machine-learning/app/models/facial_recognition/recognition.py @@ -9,7 +9,7 @@ from onnx.tools.update_model_dims import update_inputs_outputs_dims from PIL import Image -from app.config import clean_name, log +from app.config import log from app.models.base import InferenceModel from app.models.transforms import decode_cv2 from app.schemas import FaceDetectionOutput, FacialRecognitionOutput, ModelFormat, ModelSession, ModelTask, ModelType @@ -20,20 +20,14 @@ class FaceRecognizer(InferenceModel): depends = [(ModelType.DETECTION, ModelTask.FACIAL_RECOGNITION)] identity = (ModelType.RECOGNITION, ModelTask.FACIAL_RECOGNITION) - def __init__( - self, - model_name: str, - min_score: float = 0.7, - cache_dir: Path | str | None = None, - **model_kwargs: Any, - ) -> None: - super().__init__(clean_name(model_name), cache_dir, **model_kwargs) + def __init__(self, model_name: str, min_score: float = 0.7, **model_kwargs: Any) -> None: + super().__init__(model_name, **model_kwargs) self.min_score = model_kwargs.pop("minScore", min_score) self.batch = self.model_format == ModelFormat.ONNX def _load(self) -> ModelSession: session = self._make_session(self.model_path) - if self.model_format == ModelFormat.ONNX and not has_batch_axis(session): + if self.batch and not has_batch_axis(session): self._add_batch_axis(self.model_path) session = self._make_session(self.model_path) self.model = ArcFaceONNX( diff --git a/machine-learning/app/test_main.py b/machine-learning/app/test_main.py index a424c8236a4db..2c81421e52a9d 100644 --- a/machine-learning/app/test_main.py +++ b/machine-learning/app/test_main.py @@ -43,7 +43,7 @@ def test_sets_cache_dir_kwarg(self) -> None: assert encoder.cache_dir == cache_dir - def test_sets_default_preferred_format(self, mocker: MockerFixture) -> None: + def test_sets_default_model_format(self, mocker: MockerFixture) -> None: mocker.patch.object(settings, "ann", True) mocker.patch("ann.ann.is_available", False) @@ -51,7 +51,7 @@ def test_sets_default_preferred_format(self, mocker: MockerFixture) -> None: assert encoder.model_format == ModelFormat.ONNX - def test_sets_default_preferred_format_to_armnn_if_available(self, path: mock.Mock, mocker: MockerFixture) -> None: + def test_sets_default_model_format_to_armnn_if_available(self, path: mock.Mock, mocker: MockerFixture) -> None: mocker.patch.object(settings, "ann", True) mocker.patch("ann.ann.is_available", True) path.suffix = ".armnn" @@ -60,11 +60,11 @@ def test_sets_default_preferred_format_to_armnn_if_available(self, path: mock.Mo assert encoder.model_format == ModelFormat.ARMNN - def test_sets_preferred_format_kwarg(self, mocker: MockerFixture) -> None: + def test_sets_model_format_kwarg(self, mocker: MockerFixture) -> None: mocker.patch.object(settings, "ann", False) mocker.patch("ann.ann.is_available", False) - encoder = OpenClipTextualEncoder("ViT-B-32__openai", preferred_format=ModelFormat.ARMNN) + encoder = OpenClipTextualEncoder("ViT-B-32__openai", model_format=ModelFormat.ARMNN) assert encoder.model_format == ModelFormat.ARMNN @@ -129,7 +129,7 @@ def test_download(self, snapshot_download: mock.Mock) -> None: ) def test_download_downloads_armnn_if_preferred_format(self, snapshot_download: mock.Mock) -> None: - encoder = OpenClipTextualEncoder("ViT-B-32__openai", preferred_format=ModelFormat.ARMNN) + encoder = OpenClipTextualEncoder("ViT-B-32__openai", model_format=ModelFormat.ARMNN) encoder.download() snapshot_download.assert_called_once_with( @@ -140,6 +140,19 @@ def test_download_downloads_armnn_if_preferred_format(self, snapshot_download: m ignore_patterns=[], ) + def test_throws_exception_if_model_path_does_not_exist( + self, snapshot_download: mock.Mock, ort_session: mock.Mock, path: mock.Mock + ) -> None: + path.return_value.__truediv__.return_value.__truediv__.return_value.is_file.return_value = False + + encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=path) + + with pytest.raises(FileNotFoundError): + encoder.load() + + snapshot_download.assert_called_once() + ort_session.assert_not_called() + @pytest.mark.usefixtures("ort_session") class TestOrtSession: @@ -467,16 +480,18 @@ def test_recognition(self, cv_image: cv2.Mat, mocker: MockerFixture) -> None: assert isinstance(call_args[0][0], np.ndarray) assert call_args[0][0].shape == (112, 112, 3) - def test_recognition_adds_batch_axis_for_ort(self, ort_session: mock.Mock, mocker: MockerFixture) -> None: + def test_recognition_adds_batch_axis_for_ort( + self, ort_session: mock.Mock, path: mock.Mock, mocker: MockerFixture + ) -> None: onnx = mocker.patch("app.models.facial_recognition.recognition.onnx", autospec=True) update_dims = mocker.patch( "app.models.facial_recognition.recognition.update_inputs_outputs_dims", autospec=True ) mocker.patch("app.models.base.InferenceModel.download") mocker.patch("app.models.facial_recognition.recognition.ArcFaceONNX") - ort_session.return_value.get_inputs.return_value = [SimpleNamespace(name="input.1", shape=(1, 3, 224, 224))] ort_session.return_value.get_outputs.return_value = [SimpleNamespace(name="output.1", shape=(1, 800))] + path.return_value.__truediv__.return_value.__truediv__.return_value.suffix = ".onnx" proto = mock.Mock() @@ -492,27 +507,30 @@ def test_recognition_adds_batch_axis_for_ort(self, ort_session: mock.Mock, mocke onnx.load.return_value = proto - face_recognizer = FaceRecognizer("buffalo_s") + face_recognizer = FaceRecognizer("buffalo_s", cache_dir=path) face_recognizer.load() assert face_recognizer.batch is True update_dims.assert_called_once_with(proto, {"input.1": ["batch", 3, 224, 224]}, {"output.1": ["batch", 800]}) onnx.save.assert_called_once_with(update_dims.return_value, face_recognizer.model_path) - def test_recognition_does_not_add_batch_axis_if_exists(self, ort_session: mock.Mock, mocker: MockerFixture) -> None: + def test_recognition_does_not_add_batch_axis_if_exists( + self, ort_session: mock.Mock, path: mock.Mock, mocker: MockerFixture + ) -> None: onnx = mocker.patch("app.models.facial_recognition.recognition.onnx", autospec=True) update_dims = mocker.patch( "app.models.facial_recognition.recognition.update_inputs_outputs_dims", autospec=True ) mocker.patch("app.models.base.InferenceModel.download") mocker.patch("app.models.facial_recognition.recognition.ArcFaceONNX") + path.return_value.__truediv__.return_value.__truediv__.return_value.suffix = ".onnx" inputs = [SimpleNamespace(name="input.1", shape=("batch", 3, 224, 224))] outputs = [SimpleNamespace(name="output.1", shape=("batch", 800))] ort_session.return_value.get_inputs.return_value = inputs ort_session.return_value.get_outputs.return_value = outputs - face_recognizer = FaceRecognizer("buffalo_s") + face_recognizer = FaceRecognizer("buffalo_s", cache_dir=path) face_recognizer.load() assert face_recognizer.batch is True @@ -520,6 +538,30 @@ def test_recognition_does_not_add_batch_axis_if_exists(self, ort_session: mock.M onnx.load.assert_not_called() onnx.save.assert_not_called() + def test_recognition_does_not_add_batch_axis_for_armnn( + self, ann_session: mock.Mock, path: mock.Mock, mocker: MockerFixture + ) -> None: + onnx = mocker.patch("app.models.facial_recognition.recognition.onnx", autospec=True) + update_dims = mocker.patch( + "app.models.facial_recognition.recognition.update_inputs_outputs_dims", autospec=True + ) + mocker.patch("app.models.base.InferenceModel.download") + mocker.patch("app.models.facial_recognition.recognition.ArcFaceONNX") + path.return_value.__truediv__.return_value.__truediv__.return_value.suffix = ".armnn" + + inputs = [SimpleNamespace(name="input.1", shape=("batch", 3, 224, 224))] + outputs = [SimpleNamespace(name="output.1", shape=("batch", 800))] + ann_session.return_value.get_inputs.return_value = inputs + ann_session.return_value.get_outputs.return_value = outputs + + face_recognizer = FaceRecognizer("buffalo_s", model_format=ModelFormat.ARMNN, cache_dir=path) + face_recognizer.load() + + assert face_recognizer.batch is False + update_dims.assert_not_called() + onnx.load.assert_not_called() + onnx.save.assert_not_called() + @pytest.mark.asyncio class TestCache: @@ -693,7 +735,7 @@ async def test_load_clears_cache_and_retries_if_os_error(self) -> None: mock_model.clear_cache.assert_called_once() assert mock_model.load.call_count == 2 - async def test_load_clears_cache_and_raises_if_os_error_and_already_retried(self) -> None: + async def test_load_raises_if_os_error_and_already_retried(self) -> None: mock_model = mock.Mock(spec=InferenceModel) mock_model.model_name = "test_model_name" mock_model.model_type = ModelType.VISUAL @@ -707,6 +749,27 @@ async def test_load_clears_cache_and_raises_if_os_error_and_already_retried(self mock_model.clear_cache.assert_not_called() mock_model.load.assert_not_called() + async def test_falls_back_to_onnx_if_other_format_does_not_exist( + self, exception: mock.Mock, warning: mock.Mock + ) -> None: + mock_model = mock.Mock(spec=InferenceModel) + mock_model.model_name = "test_model_name" + mock_model.model_type = ModelType.VISUAL + mock_model.model_task = ModelTask.SEARCH + mock_model.model_format = ModelFormat.ARMNN + mock_model.loaded = False + mock_model.load_attempts = 0 + error = FileNotFoundError() + mock_model.load.side_effect = [error, None] + + await load(mock_model) + + mock_model.clear_cache.assert_not_called() + assert mock_model.load.call_count == 2 + exception.assert_called_once_with(error) + warning.assert_called_once_with("ARMNN is available, but model 'test_model_name' does not support it.") + mock_model.model_format = ModelFormat.ONNX + @pytest.mark.skipif( not settings.test_full,