From 550c1f01def1d594db549dd543646a9b116e28f8 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Fri, 16 Aug 2024 15:20:34 +1000
Subject: [PATCH 01/63] Finish initial commit

---
 src/marqo/s2_inference/clip_utils.py          | 294 ++++++++++++------
 .../models/open_clip_model_properties.py      |  36 +++
 .../processing/custom_clip_utils.py           |   6 +-
 3 files changed, 230 insertions(+), 106 deletions(-)
 create mode 100644 src/marqo/s2_inference/models/open_clip_model_properties.py

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 4a7095ed0..38234b191 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -12,20 +12,23 @@
 import validators
 from PIL import Image, UnidentifiedImageError
 from multilingual_clip import pt_multilingual_clip
+from open_clip.pretrained import _pcfg, _slpcfg, _apcfg, _mccfg
+from open_clip.transform import image_transform_v2, PreprocessCfg
 from requests.utils import requote_uri
 from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
 from torchvision.transforms import InterpolationMode
 
+from marqo import marqo_docs
 from marqo.api.exceptions import InternalError
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.errors import InvalidModelPropertiesError, ImageDownloadError
 from marqo.s2_inference.logger import get_logger
+from marqo.s2_inference.models.open_clip_model_properties import OpenCLIPModelProperties, ImagePreprocessor
 from marqo.s2_inference.processing.custom_clip_utils import HFTokenizer, download_model
 from marqo.s2_inference.types import *
 from marqo.tensor_search.enums import ModelProperties, InferenceParams
 from marqo.tensor_search.models.private_models import ModelLocation
 from marqo.tensor_search.telemetry import RequestMetrics
-from marqo import marqo_docs
 
 logger = get_logger(__name__)
 
@@ -66,7 +69,8 @@ def _get_transform(n_px: int, image_mean: List[float] = None, image_std: List[fl
     ])
 
 
-def format_and_load_CLIP_images(images: List[Union[str, ndarray, ImageType]], image_download_headers: dict) -> List[ImageType]:
+def format_and_load_CLIP_images(images: List[Union[str, ndarray, ImageType]], image_download_headers: dict) -> List[
+    ImageType]:
     """takes in a list of strings, arrays or urls and either loads and/or converts to PIL
         for the clip model
 
@@ -197,6 +201,7 @@ def encode_url(url: str) -> str:
     """
     return requests.utils.requote_uri(url)
 
+
 def format_and_load_CLIP_image(image: Union[str, ndarray, ImageType, Tensor],
                                image_download_headers: dict) -> Union[ImageType, Tensor]:
     """standardizes the input to be a PIL image
@@ -258,7 +263,8 @@ def _is_image(inputs: Union[str, List[Union[str, ImageType, ndarray]]]) -> bool:
         # if it is a local file without extension, then raise an error
         if os.path.isfile(thing):
             # we could also read the first part of the file and infer
-            raise UnidentifiedImageError(f"local file [{thing}] extension {extension} does not match allowed file types of {_allowed}")
+            raise UnidentifiedImageError(
+                f"local file [{thing}] extension {extension} does not match allowed file types of {_allowed}")
         else:
             # if it is not a local file and does not have an extension
             # check if url
@@ -275,13 +281,12 @@ def _is_image(inputs: Union[str, List[Union[str, ImageType, ndarray]]]) -> bool:
 
 
 class CLIP:
-
     """
     conveniance class wrapper to make clip work easily for both text and image encoding
     """
 
-    def __init__(self, model_type: str = "ViT-B/32", device: str = None,  embedding_dim: int = None,
-                            truncate: bool = True, **kwargs) -> None:
+    def __init__(self, model_type: str = "ViT-B/32", device: str = None, embedding_dim: int = None,
+                 truncate: bool = True, **kwargs) -> None:
 
         self.model_type = model_type
 
@@ -298,7 +303,6 @@ def __init__(self, model_type: str = "ViT-B/32", device: str = None,  embedding_
         # model_auth gets passed through add_docs and search requests:
         self.model_auth = kwargs.get(InferenceParams.model_auth, None)
 
-
     def _download_from_repo(self):
         """Downloads model from an external repo like s3 and returns the filepath
 
@@ -327,14 +331,14 @@ def _download_from_repo(self):
     def load(self) -> None:
 
         model_location_presence = ModelProperties.model_location in self.model_properties
-
-        path = self.model_properties.get("localpath", None) or self.model_properties.get("url",None)
+        path = self.model_properties.get("localpath", None) or self.model_properties.get("url", None)
 
         if path is None and not model_location_presence:
             # We must load the model into CPU then transfer it to the desired device, always
             # The original method to load the openai clip model
             # https://github.com/openai/CLIP/issues/30
-            self.model, self.preprocess = clip.load(self.model_type, device='cpu', jit=False, download_root=ModelCache.clip_cache_path)
+            self.model, self.preprocess = clip.load(self.model_type, device='cpu', jit=False,
+                                                    download_root=ModelCache.clip_cache_path)
             self.model = self.model.to(self.device)
             self.tokenizer = clip.tokenize
         else:
@@ -367,11 +371,11 @@ def custom_clip_load(self):
 
         logger.info(f"The name of the custom clip model is {self.model_name}. We use openai clip load")
         # We must load the model into CPU then transfer it to the desired device, always
-        model, preprocess = clip.load(name=self.model_path, device="cpu", jit= self.jit, download_root=ModelCache.clip_cache_path)
+        model, preprocess = clip.load(name=self.model_path, device="cpu", jit=self.jit,
+                                      download_root=ModelCache.clip_cache_path)
         model = model.to(self.device)
         return model, preprocess
 
-
     def _convert_output(self, output):
         if self.device == 'cpu':
             return output.numpy()
@@ -382,7 +386,7 @@ def _convert_output(self, output):
     def normalize(outputs):
         return outputs.norm(dim=-1, keepdim=True)
 
-    def encode_text(self, sentence: Union[str, List[str]], normalize = True) -> FloatTensor:
+    def encode_text(self, sentence: Union[str, List[str]], normalize=True) -> FloatTensor:
 
         if self.model is None:
             self.load()
@@ -390,7 +394,7 @@ def encode_text(self, sentence: Union[str, List[str]], normalize = True) -> Floa
         text = self.tokenizer(sentence, truncate=self.truncate).to(self.device)
 
         with torch.no_grad():
-            outputs =  self.model.encode_text(text)
+            outputs = self.model.encode_text(text)
 
         if normalize:
             _shape_before = outputs.shape
@@ -423,12 +427,12 @@ def _preprocess_images(self, images: Union[str, ImageType, List[Union[str, Image
             image_input: List[Union[ImageType, Tensor]] = [format_and_load_CLIP_image(images, image_download_headers)]
 
         image_input_processed: Tensor = torch.stack([self.preprocess(_img).to(self.device) \
-                                                                    if not isinstance(_img, torch.Tensor) else _img \
-                                                                for _img in image_input])
+                                                         if not isinstance(_img, torch.Tensor) else _img \
+                                                     for _img in image_input])
         return image_input_processed
 
     def encode_image(self, images: Union[str, ImageType, List[Union[str, ImageType, Tensor]], Tensor],
-                    normalize = True, image_download_headers: Optional[Dict] = None) -> FloatTensor:
+                     normalize=True, image_download_headers: Optional[Dict] = None) -> FloatTensor:
         """Encode the input image to a tensor representation.
 
         Args:
@@ -450,7 +454,7 @@ def encode_image(self, images: Union[str, ImageType, List[Union[str, ImageType,
         return self._convert_output(outputs)
 
     def encode(self, inputs: Union[str, ImageType, List[Union[str, ImageType]]],
-                                default: str = 'text', normalize = True, **kwargs) -> FloatTensor:
+               default: str = 'text', normalize=True, **kwargs) -> FloatTensor:
 
         infer = kwargs.pop('infer', True)
 
@@ -475,8 +479,8 @@ def encode(self, inputs: Union[str, ImageType, List[Union[str, ImageType]]],
 
 
 class FP16_CLIP(CLIP):
-    def __init__(self, model_type: str = "fp16/ViT-B/32", device: str = None,  embedding_dim: int = None,
-                            truncate: bool = True, **kwargs) -> None:
+    def __init__(self, model_type: str = "fp16/ViT-B/32", device: str = None, embedding_dim: int = None,
+                 truncate: bool = True, **kwargs) -> None:
         super().__init__(model_type, device, embedding_dim, truncate, **kwargs)
         '''This class loads the provided clip model directly from cuda in float16 version. The inference time is halved
         with very minor accuracy drop. 
@@ -484,76 +488,108 @@ def __init__(self, model_type: str = "fp16/ViT-B/32", device: str = None,  embed
 
         if not self.device.startswith("cuda"):
             logger.warning(f"The fp16 clip model `{self.model_type} is loaded with device `{self.device}`."
-                              f"FP16 clip model `{self.model_type}` is only available with device `cuda`.\n"
-                              f"With current device `{self.device}`, the model will be loaded in `float32` mode. \n"
-                              f"Please check you cuda availability or try the fp32 version `{self.model_type.replace('fp16/','')}`"
-                              f"Check `https://docs.marqo.ai/0.0.13/Models-Reference/dense_retrieval/#generic-clip-models` for more info.")
+                           f"FP16 clip model `{self.model_type}` is only available with device `cuda`.\n"
+                           f"With current device `{self.device}`, the model will be loaded in `float32` mode. \n"
+                           f"Please check you cuda availability or try the fp32 version `{self.model_type.replace('fp16/', '')}`"
+                           f"Check `https://docs.marqo.ai/0.0.13/Models-Reference/dense_retrieval/#generic-clip-models` for more info.")
 
         self.model_name = self.model_type.replace("fp16/", "")
 
-
     def load(self) -> None:
         # https://github.com/openai/CLIP/issues/30
-        self.model, self.preprocess = clip.load(self.model_name, device=self.device, jit=False, download_root=ModelCache.clip_cache_path)
+        self.model, self.preprocess = clip.load(self.model_name, device=self.device, jit=False,
+                                                download_root=ModelCache.clip_cache_path)
         self.model = self.model.to(self.device)
         self.tokenizer = clip.tokenize
         self.model.eval()
 
 
 class OPEN_CLIP(CLIP):
-    def __init__(self, model_type: str = "open_clip/ViT-B-32-quickgelu/laion400m_e32", device: str = None,  embedding_dim: int = None,
-                            truncate: bool = True, **kwargs) -> None:
-        super().__init__(model_type, device,  embedding_dim, truncate , **kwargs)
+    def __init__(self, model_type: str = "open_clip/ViT-B-32-quickgelu/laion400m_e32", device: str = None,
+                 embedding_dim: int = None,
+                 truncate: bool = True, **kwargs) -> None:
+        super().__init__(model_type, device, embedding_dim, truncate, **kwargs)
+        self.preprocess_config = None
         self.model_name = model_type.split("/", 3)[1] if model_type.startswith("open_clip/") else model_type
         self.pretrained = model_type.split("/", 3)[2] if model_type.startswith("open_clip/") else model_type
+        self.model_properties = OpenCLIPModelProperties(**self.model_properties)
 
     def load(self) -> None:
-        # https://github.com/mlfoundations/open_clip
-        path = self.model_properties.get("localpath", None) or self.model_properties.get("url", None)
+        if self.model_properties.url or self.model_properties.model_location:
+            self.model, self.preprocess = self._load_model_and_image_preprocessor_from_checkpoint()
+            self.tokenizer = self._load_tokenizer_from_checkpoint()
+        elif self.model_type.startswith("open_clip/"):
+            self.model, self.preprocess = self._load_model_and_image_preprocessor_from_open_clip_repo()
+            self.tokenizer = self._load_tokenizer_from_open_clip_repo()
+        elif self.model_type.startswith("hf-hub:"):
+            self.model, self.preprocess = self._load_model_and_image_preprocessor_from_hf_repo()
+            self.tokenizer = self._load_tokenizer_from_hf_repo()
+        else:
+            raise InvalidModelPropertiesError(
+                f"Marqo cannot load the provided open_clip model. "
+                f"Check {marqo_docs.bring_your_own_model()} "
+                f"for more details on how to load a open_clip model "
+            )
+        self.model.eval()
 
-        model_location_presence = ModelProperties.model_location in self.model_properties
-        if path is None and not model_location_presence:
-            self.model, _, self.preprocess = open_clip.create_model_and_transforms(self.model_name,
-                                                                                   pretrained=self.pretrained,
-                                                                                   device=self.device, jit=False, cache_dir=ModelCache.clip_cache_path)
-            self.tokenizer = open_clip.get_tokenizer(self.model_name)
-            self.model.eval()
+    def _load_image_preprocessor(self) -> Callable:
+        return image_transform_v2(self.preprocess_config)
+
+    def _aggregate_image_preprocessor_config(self) -> PreprocessCfg:
+        """Aggregate the image preprocessor configuration for the open_clip model."""
+
+        if self.model_properties.image_preprocessor in [ImagePreprocessor.OpenCLIP, ImagePreprocessor.OpenAI]:
+            image_preprocess_config = PreprocessCfg(**_pcfg)
+        elif self.model_properties.image_preprocessor in [ImagePreprocessor.SiCLIP]:
+            image_preprocess_config = PreprocessCfg(**_slpcfg)
+        elif self.model_properties.image_preprocessor in [ImagePreprocessor.CLIPA]:
+            image_preprocess_config = PreprocessCfg(**_apcfg)
+        elif self.model_properties.image_preprocessor in [ImagePreprocessor.MobileCLIP]:
+            image_preprocess_config = PreprocessCfg(**_mccfg)
         else:
-            if path and model_location_presence:
-                raise InvalidModelPropertiesError(
-                    "Only one of `url`, `localpath` or `model_location can be specified in "
-                    "model_properties`. Please ensure that only one of these is specified in "
-                    "model_properties and retry.")
-            logger.info("Detecting custom clip model path. We use generic clip model loading.")
-            if model_location_presence:
-                self.model_path = self._download_from_repo()
-            elif os.path.isfile(path):
-                self.model_path = path
-            elif validators.url(path):
-                self.model_path = download_model(url=path)
-            else:
-                raise InvalidModelPropertiesError(
-                    f"Marqo cannot load the custom clip model. "
-                    f"The provided model path `{path}` is neither a local file nor a valid url. "
-                    f"Please check your provided model url and retry. "
-                    f"Check `https://docs.marqo.ai/0.0.13/Models-Reference/dense_retrieval/#generic-clip-models` for more info.")
+            raise ValueError(f"Invalid image preprocessor {self.model_properties.image_preprocessor}")
 
-            self.precision = self.model_properties.get("precision", "fp32")
-            self.jit = self.model_properties.get("jit", False)
-            self.mean = self.model_properties.get("mean", None)
-            self.std = self.model_properties.get("std", None)
-            self.model, self.preprocess = self.custom_clip_load()
-            self.tokenizer = self.load_tokenizer()
+        if self.model_properties.mean:
+            image_preprocess_config.image_mean = self.model_properties.mean
+        if self.model_properties.std:
+            image_preprocess_config.image_std = self.model_properties.std
 
-            self.model.eval()
+        return image_preprocess_config
+
+    def _load_model_and_image_preprocessor_from_checkpoint(self) -> Tuple[torch.nn.Module, Compose]:
+        """Load the model and image preprocessor from a checkpoint file.
+
+        The checkpoint file can be provided through a URL or a model_location object.
+        """
+        # Load the image preprocessor
+        self.preprocess_config = self._aggregate_image_preprocessor_config()
+        self.preprocess = image_transform_v2(self.preprocess_config, is_train=False)
+
+        if self.model_properties.url and self.model_properties.model_location:
+            raise InvalidModelPropertiesError(
+                "Only one of url, model_location can be specified in 'model_properties' "
+            )
+        elif self.model_properties.model_location:
+            self.model_path = self._download_from_repo()
+        elif self.model_properties.url:
+            self.model_path = download_model(url=self.model_properties.url)
+        else:
+            raise ValueError("The 'url' or 'model_location' is required in 'model_properties' "
+                             "when loading a custom open_clip model through a URL or a model_location object")
+
+        logger.info(f"The name of the custom clip model is {self.model_name}. We use open_clip loader")
 
-    def custom_clip_load(self):
-        self.model_name = self.model_properties.get("name", None)
-        logger.info(f"The name of the custom clip model is {self.model_name}. We use open_clip load")
         try:
             model, _, preprocess = open_clip.create_model_and_transforms(
-                model_name=self.model_name, jit=self.jit, pretrained=self.model_path, precision=self.precision,
-                image_mean=self.mean, image_std=self.std, device=self.device, cache_dir=ModelCache.clip_cache_path)
+                model_name=self.model_name,
+                jit=self.model_properties.jit,
+                pretrained=self.model_path,
+                precision=self.model_properties.precision,
+                image_mean=self.model_properties.mean,
+                image_std=self.model_properties.std,
+                device=self.device,
+                cache_dir=ModelCache.clip_cache_path
+            )
             return model, preprocess
         except Exception as e:
             if (isinstance(e, RuntimeError) and "The file might be corrupted" in str(e)):
@@ -561,48 +597,105 @@ def custom_clip_load(self):
                     os.remove(self.model_path)
                 except Exception as remove_e:
                     raise RuntimeError(
-                        f"Marqo encountered an error while attempting to delete a corrupted file `{self.model_path}`. "
+                        f"Marqo encountered an error while attempting to delete a corrupted file '{self.model_path}'. "
                         f"Please report this issue on Marqo's Github Repo and replace the problematic Marqo instance "
                         f"with a new one. \n "
                         f"Error message: `{str(remove_e)}`"
                     )
                 raise InvalidModelPropertiesError(
-                    f"Marqo encountered a corrupted file when loading open_clip file `{self.model_path}`. "
+                    f"Marqo encountered a corrupted file when loading open_clip file '{self.model_path}'. "
                     f"Marqo has removed this file from the disk. "
-                    f"Some possible causes are: \n"
-                    f"1. the file was not a valid open_clip checkpoint, \n"
-                    f"2. the file was corrupted during download or incompletely downloaded, \n"
-                    f"3. you may have tried to load a `clip` model even though `model_properties['type']` is set to 'open_clip'. \n"
+                    f"Some possible causes are: "
+                    f"1. the file was not a valid open_clip checkpoint, "
+                    f"2. the file was corrupted during download or incompletely downloaded, "
+                    f"3. you may have tried to load a clip model even though model_properties['type'] is set to 'open_clip' "
                     f"Please check and update your model properties and retry. "
-                    f"You can find more details at `https://docs.marqo.ai/0.0.21/Models-Reference/bring_your_own_model/#bring-your-own-clip-model`")
+                    f"You can find more details at {marqo_docs.bring_your_own_model()}")
             # It is tricky to cacth the error when loading clip model using type = open_clip. Different pytorch version will raise different error.
-            elif isinstance(e, (AttributeError, RuntimeError)) or ("This could be because the operator doesn't exist for this backend" in str(e)):
+            elif isinstance(e, (AttributeError, RuntimeError)) or (
+                    "This could be because the operator doesn't exist for this backend" in str(e)):
                 raise InvalidModelPropertiesError(
-                    f"Marqo encountered an error when loading custom open_clip model `{self.model_name}` with "
-                    f"model properties = `{self.model_properties}`. \n"
-                    f"The error message is `{str(e)}`. \n"
-                    f"You may have tried to load a `clip` model even though `model_properties['type']` is set to 'open_clip' \n"
+                    f"Marqo encountered an error when loading custom open_clip model '{self.model_name}' with "
+                    f"model properties = '{self.model_properties.dict()}'. "
+                    f"The error message is {str(e)}. "
+                    f"You may have tried to load a clip model even though model_properties['type'] is set to 'open_clip' "
                     f"Please check and update your model properties and retry. "
-                    f"You can find more details at `https://docs.marqo.ai/0.0.21/Models-Reference/bring_your_own_model/#bring-your-own-clip-model`"
+                    f"You can find more details at {marqo_docs.bring_your_own_model()}"
                 )
-
             else:
                 raise RuntimeError(
-                    f"Marqo encountered an error when loading custom open_clip model `{self.model_name}` with "
-                    f"model properties = `{self.model_properties}`. \n"
-                    f"The error message is `{str(e)}`. \n"
+                    f"Marqo encountered an error when loading custom open_clip model {self.model_name} with "
+                    f"model properties = {self.model_properties.dict()}. "
+                    f"The error message is {str(e)}. "
                     f"Please check and update your model properties and retry. "
-                    f"You can find more details at `https://docs.marqo.ai/0.0.21/Models-Reference/bring_your_own_model/#bring-your-own-clip-model`"
+                    f"You can find more details at {marqo_docs.bring_your_own_model()}"
                 )
 
-    def load_tokenizer(self):
-        tokenizer_name = self.model_properties.get("tokenizer", "clip")
+    def _load_model_and_image_preprocessor_from_hf_repo(self) -> Tuple[torch.nn.Module, Compose]:
+        """Load the model and image preprocessor from a hf_repo.
+
+        The hf_repo should be provided in the model properties, and it is a string starting with `hf-hub:`.
+        """
+        model, _, preprocess = open_clip.create_model_and_transforms(
+            self.model_type,
+            device=self.device,
+            cache_dir=ModelCache.clip_cache_path,
+        )
+        return model, preprocess
+
+    def _load_model_and_image_preprocessor_from_open_clip_repo(self) -> Tuple[torch.nn.Module, Compose]:
+        """Load the model and image preprocessor from the marqo model registry.
 
-        if tokenizer_name == "clip":
-            return open_clip.tokenize
+        The model name should be provided in the model properties, and it is a string starting with `open_clip/`.
+        """
+        self.model_name = self.model_type.split("/", 3)[1]
+        self.pretrained = self.model_type.split("/", 3)[2]
+
+        model, _, preprocess = open_clip.create_model_and_transforms(
+            self.model_name,
+            pretrained=self.pretrained,
+            device=self.device,
+            cache_dir=ModelCache.clip_cache_path
+        )
+        return model, preprocess
+
+    def _load_tokenizer_from_checkpoint(self) -> Callable:
+        if not self.model_properties.tokenizer:
+            return open_clip.get_tokenizer(self.model_name)
         else:
             logger.info(f"Custom HFTokenizer is provided. Loading...")
-            return HFTokenizer(tokenizer_name)
+            return HFTokenizer(self.model_properties.tokenizer)
+
+    def _load_tokenizer_from_hf_repo(self) -> Callable:
+        return open_clip.get_tokenizer(self.model_type)
+
+    def _load_tokenizer_from_open_clip_repo(self) -> Callable:
+        return open_clip.get_tokenizer(self.model_name)
+
+    def _download_from_repo(self):
+        """Downloads model from an external repo like s3 and returns the filepath
+
+        Returns:
+            The model's filepath
+
+        Raises:
+            RunTimeError if an empty filepath is detected. This is important
+                because OpenCLIP will instantiate a model with random weights, if
+                a filepath isn't specified, and the model isn't a publicly
+                available HF or OpenAI one.
+        """
+        model_location: ModelLocation = self.model_properties.model_location
+        download_model_params = {"repo_location": model_location}
+
+        if model_location.auth_required:
+            download_model_params['auth'] = self.model_properties.model_auth
+
+        model_file_path = download_model(**download_model_params)
+        if model_file_path is None or model_file_path == '':
+            raise RuntimeError(
+                'download_model() needs to return a valid filepath to the model! Instead, received '
+                f' filepath `{model_file_path}`')
+        return model_file_path
 
     def encode_image(self, images: Union[str, ImageType, List[Union[str, ImageType]]],
                      image_download_headers: Optional[Dict] = None,
@@ -623,7 +716,6 @@ def encode_image(self, images: Union[str, ImageType, List[Union[str, ImageType]]
             assert outputs.shape == _shape_before
         return self._convert_output(outputs)
 
-
     def encode_text(self, sentence: Union[str, List[str]], normalize=True) -> FloatTensor:
 
         if self.model is None:
@@ -647,12 +739,12 @@ def encode_text(self, sentence: Union[str, List[str]], normalize=True) -> FloatT
 
 
 class MULTILINGUAL_CLIP(CLIP):
-    def __init__(self, model_type: str = "multilingual-clip/ViT-L/14", device: str = None,  embedding_dim: int = None,
-                            truncate: bool = True, **kwargs) -> None:
+    def __init__(self, model_type: str = "multilingual-clip/ViT-L/14", device: str = None, embedding_dim: int = None,
+                 truncate: bool = True, **kwargs) -> None:
 
         if not device:
             raise InternalError("`device` is required for loading MULTILINGUAL CLIP models!")
-        
+
         self.model_name = model_type
         self.model_info = get_multilingual_clip_properties()[self.model_name]
         self.visual_name = self.model_info["visual_model"]
@@ -661,19 +753,20 @@ def __init__(self, model_type: str = "multilingual-clip/ViT-L/14", device: str =
         self.tokenizer = None
         self.preprocess = None
 
-
     def load(self) -> None:
         if self.visual_name.startswith("openai/"):
             clip_name = self.visual_name.replace("openai/", "")
             # We must load the model into CPU then transfer it to the desired device, always
             # The reason is this issue: https://github.com/openai/CLIP/issues/30
-            self.visual_model, self.preprocess = clip.load(name = clip_name, device = "cpu", jit = False, download_root=ModelCache.clip_cache_path)
+            self.visual_model, self.preprocess = clip.load(name=clip_name, device="cpu", jit=False,
+                                                           download_root=ModelCache.clip_cache_path)
             self.visual_model = self.visual_model.to(self.device)
             self.visual_model = self.visual_model.visual
 
         elif self.visual_name.startswith("open_clip/"):
             clip_name = self.visual_name.replace("open_clip/", "")
-            self.visual_model, _, self.preprocess = open_clip.create_model_and_transforms(model_name=clip_name.split("/")[0], pretrained= clip_name.split("/")[1], device = self.device)
+            self.visual_model, _, self.preprocess = open_clip.create_model_and_transforms(
+                model_name=clip_name.split("/")[0], pretrained=clip_name.split("/")[1], device=self.device)
             self.visual_model = self.visual_model.visual
 
         self.textual_model = pt_multilingual_clip.MultilingualCLIP.from_pretrained(self.textual_name, self.device)
@@ -682,7 +775,6 @@ def load(self) -> None:
         self.textual_model.eval()
         self.visual_model.eval()
 
-
     def encode_text(self, sentence: Union[str, List[str]], normalize=True) -> FloatTensor:
 
         if self.textual_model is None:
@@ -698,7 +790,6 @@ def encode_text(self, sentence: Union[str, List[str]], normalize=True) -> FloatT
 
         return self._convert_output(outputs)
 
-
     def encode_image(self, images: Union[str, ImageType, List[Union[str, ImageType]]],
                      normalize=True, image_download_headers: Optional[dict] = None) -> FloatTensor:
 
@@ -767,6 +858,3 @@ def get_multilingual_clip_properties() -> Dict:
             }
     }
     return MULTILINGUAL_CLIP_PROPERTIES
-
-
-
diff --git a/src/marqo/s2_inference/models/open_clip_model_properties.py b/src/marqo/s2_inference/models/open_clip_model_properties.py
new file mode 100644
index 000000000..d87a094a1
--- /dev/null
+++ b/src/marqo/s2_inference/models/open_clip_model_properties.py
@@ -0,0 +1,36 @@
+
+from enum import Enum
+from typing import Optional, List
+
+from pydantic import Field
+
+from marqo.base_model import MarqoBaseModel
+from marqo.tensor_search.models.private_models import ModelLocation, ModelAuth
+
+
+class ImagePreprocessor(str, Enum):
+    SiCLIP = "SiCLIP"
+    OpenAI = "OpenAI"
+    OpenCLIP = "OpenCLIP"
+    MobileCLIP = "MobileCLIP"
+    CLIPA = "CLIPA"
+
+
+class Precision(str, Enum):
+    FP32 = "fp32"
+    FP16 = "fp16"
+
+
+class OpenCLIPModelProperties(MarqoBaseModel):
+    name: str
+    type: str
+    jit: bool = False
+    precision: Precision = Precision.FP32
+    url: Optional[str] = None
+    model_location: Optional[ModelLocation] = Field(default=None, alias="modelLocation")
+    tokenizer: Optional[str] = None
+    model_auth: Optional[ModelAuth] = Field(default=None, alias="modelAuth")
+    image_preprocessor: ImagePreprocessor = Field(default=ImagePreprocessor.OpenCLIP, alias="imagePreprocessor")
+    mean: Optional[List[float]] = None
+    std: Optional[List[float]] = None
+    size: Optional[int] = None
\ No newline at end of file
diff --git a/src/marqo/s2_inference/processing/custom_clip_utils.py b/src/marqo/s2_inference/processing/custom_clip_utils.py
index 1fc9efef0..cd069350c 100644
--- a/src/marqo/s2_inference/processing/custom_clip_utils.py
+++ b/src/marqo/s2_inference/processing/custom_clip_utils.py
@@ -32,17 +32,17 @@ def basic_clean(text):
 class HFTokenizer:
     # HuggingFace tokenizer wrapper
     # Check https://github.com/mlfoundations/open_clip/blob/16e229c596cafaec46a4defaf27e0e30ffcca12d/src/open_clip/tokenizer.py#L188-L201
-    def __init__(self, tokenizer_name:str):
+    def __init__(self, tokenizer_name: str):
         from transformers import AutoTokenizer
         self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
 
-    def __call__(self, texts:Union[str, List[str]], context_length:int=77) -> torch.Tensor:
+    def __call__(self, texts: Union[str, List[str]]) -> torch.Tensor:
         # same cleaning as for default tokenizer, except lowercasing
         # adding lower (for case-sensitive tokenizers) will make it more robust but less sensitive to nuance
         if isinstance(texts, str):
             texts = [texts]
         texts = [whitespace_clean(basic_clean(text)) for text in texts]
-        input_ids = self.tokenizer(texts, return_tensors='pt', max_length=context_length, padding='max_length', truncation=True).input_ids
+        input_ids = self.tokenizer(texts, return_tensors='pt', padding='max_length', truncation=True).input_ids
         return input_ids
 
 

From 0685691b50fc82da7dda68ff079881659ed12551 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 21 Aug 2024 12:56:14 +1000
Subject: [PATCH 02/63] Finish tests

---
 requirements.dev.txt                          |   2 +-
 src/marqo/core/inference/__init__.py          |   0
 src/marqo/core/inference/models/__init__.py   |   0
 .../inference/models/abstract_clip_model.py   | 133 ++++
 .../core/inference/models/abstract_model.py   |  14 +
 .../models/open_clip_model_properties.py      |  62 ++
 src/marqo/core/inference/models/utils.py      | 236 +++++++
 src/marqo/s2_inference/clip_utils.py          |  90 +--
 src/marqo/s2_inference/model_registry.py      |  12 +
 .../models/open_clip_model_properties.py      |  36 -
 .../s2_inference/open_clip_models/__init__.py |   0
 .../test_marqo_fashion_clip.py                | 615 ++++++++++++++++++
 .../test_open_clip_model_load.py              | 149 +++++
 13 files changed, 1275 insertions(+), 74 deletions(-)
 create mode 100644 src/marqo/core/inference/__init__.py
 create mode 100644 src/marqo/core/inference/models/__init__.py
 create mode 100644 src/marqo/core/inference/models/abstract_clip_model.py
 create mode 100644 src/marqo/core/inference/models/abstract_model.py
 create mode 100644 src/marqo/core/inference/models/open_clip_model_properties.py
 create mode 100644 src/marqo/core/inference/models/utils.py
 delete mode 100644 src/marqo/s2_inference/models/open_clip_model_properties.py
 create mode 100644 tests/s2_inference/open_clip_models/__init__.py
 create mode 100644 tests/s2_inference/open_clip_models/test_marqo_fashion_clip.py
 create mode 100644 tests/s2_inference/open_clip_models/test_open_clip_model_load.py

diff --git a/requirements.dev.txt b/requirements.dev.txt
index d19618a73..f93bb4f0a 100644
--- a/requirements.dev.txt
+++ b/requirements.dev.txt
@@ -47,7 +47,7 @@ cachetools==5.3.1
 # to fix ARM64 build scikit-learn error
 platformdirs==3.5.0
 safetensors==0.4.1
-timm==0.9.12
+timm==1.0.8
 
 flatbuffers==23.5.9
 certifi==2019.11.28
diff --git a/src/marqo/core/inference/__init__.py b/src/marqo/core/inference/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/marqo/core/inference/models/__init__.py b/src/marqo/core/inference/models/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/marqo/core/inference/models/abstract_clip_model.py b/src/marqo/core/inference/models/abstract_clip_model.py
new file mode 100644
index 000000000..2be6f6253
--- /dev/null
+++ b/src/marqo/core/inference/models/abstract_clip_model.py
@@ -0,0 +1,133 @@
+from abc import abstractmethod
+
+from PIL import UnidentifiedImageError
+
+from marqo.core.inference.models.abstract_model import AbstractModel
+from marqo.s2_inference.types import *
+from marqo.core.inference.models.utils import _is_image, format_and_load_CLIP_images, format_and_load_CLIP_image
+from marqo.s2_inference.logger import get_logger
+import torch
+
+logger = get_logger(__name__)
+
+
+class AbstractCLIPModel(AbstractModel):
+    """Abstract base class for CLIP models.
+
+    Attributes:
+        model_tag (str): The tag of the model. It is used to identify the model in the model registry.
+        device (str): The device to load the model on, typically 'cpu' or 'cuda'.
+        embedding_dim (int, optional): The dimensionality of the model's embeddings. If not provided,
+            it should be inferred from the model.
+        truncate (bool): Indicates whether the text should be truncated to a smaller size in the tokenizer.
+        model_properties (dict): A dictionary containing additional properties or configurations
+            specific to the model. Defaults to an empty dictionary if not provided.
+        model: The actual CLIP model instance, initialized to `None` and to be set by subclasses.
+        tokenizer: The tokenizer associated with the model, initialized to `None` and to be set by subclasses.
+        preprocess: The preprocessing pipeline for the model, initialized to `None` and to be set by subclasses.
+    """
+
+    def __init__(self, model_type: str,
+                 device: Optional[str] = None,
+                 embedding_dim: Optional[int] = None,
+                 truncate: bool = True,
+                 model_properties: Optional[dict] = None,
+                 **kwargs):
+        """Instantiate the abstract CLIP model.
+
+        Args:
+            model_type (str): The type of the model.
+            device (str): The device to load the model on.
+            embedding_dim (int): The embedding dimension of the model.
+            truncate (bool): Whether to truncate the model.
+            model_properties (dict): The properties of the model.
+        """
+        self.model_tag = model_type
+
+        if not device:
+            raise ValueError("`device` is required for loading CLIP models!")
+        self.device = device
+
+        self.embedding_dim = embedding_dim
+        self.truncate = truncate
+
+        self.model_properties = model_properties
+        if self.model_properties is None:
+            self.model_properties = dict()
+
+        self.model = None
+        self.tokenizer = None
+        self.preprocess = None
+
+    @abstractmethod
+    def load_tokenizer(self):
+        """Load tokenizer."""
+        pass
+
+    @abstractmethod
+    def encode_text(self, inputs: Union[str, List[str]], normalize: bool = True) -> FloatTensor:
+        pass
+
+    @abstractmethod
+    def encode_image(self, inputs, normalize: bool = True, image_download_headers: dict = None) -> FloatTensor:
+        pass
+
+    def encode(self, inputs: Union[str, ImageType, List[Union[str, ImageType]]],
+               default: str = 'text', normalize=True, **kwargs) -> FloatTensor:
+        infer = kwargs.pop('infer', True)
+
+        if infer and _is_image(inputs):
+            is_image = True
+        else:
+            if default == 'text':
+                is_image = False
+            elif default == 'image':
+                is_image = True
+            else:
+                raise UnidentifiedImageError(f"expected default='image' or default='text' but received {default}")
+
+        if is_image:
+            logger.debug('image')
+            image_download_headers = kwargs.get("image_download_headers", dict())
+            return self.encode_image(inputs, normalize=normalize, image_download_headers=image_download_headers)
+        else:
+            logger.debug('text')
+            return self.encode_text(inputs, normalize=normalize)
+
+    def _convert_output(self, output):
+        if self.device == 'cpu':
+            return output.numpy()
+        elif self.device.startswith('cuda'):
+            return output.cpu().numpy()
+
+    @staticmethod
+    def normalize(outputs):
+        return outputs.norm(dim=-1, keepdim=True)
+
+    def _preprocess_images(self, images: Union[str, ImageType, List[Union[str, ImageType, Tensor]], Tensor],
+                           image_download_headers: Optional[Dict] = None) -> Tensor:
+        """Preprocess the input image to be ready for the model.
+
+        Args:
+            images (Union[str, ImageType, List[Union[str, ImageType, Tensor]], Tensor]): input image,
+            can be a str(url), a PIL image, or a tensor, or a list of them
+            image_download_headers (Optional[Dict]): headers for the image download
+        Return:
+            Tensor: the processed image tensor with shape (batch_size, channel, n_px, n_px)
+        """
+        if self.model is None:
+            self.load()
+        if image_download_headers is None:
+            image_download_headers = dict()
+
+        # default to batch encoding
+        if isinstance(images, list):
+            image_input: List[Union[ImageType, Tensor]] \
+                = format_and_load_CLIP_images(images, image_download_headers)
+        else:
+            image_input: List[Union[ImageType, Tensor]] = [format_and_load_CLIP_image(images, image_download_headers)]
+
+        image_input_processed: Tensor = torch.stack([self.preprocess(_img).to(self.device) \
+                                                         if not isinstance(_img, torch.Tensor) else _img \
+                                                     for _img in image_input])
+        return image_input_processed
\ No newline at end of file
diff --git a/src/marqo/core/inference/models/abstract_model.py b/src/marqo/core/inference/models/abstract_model.py
new file mode 100644
index 000000000..944402242
--- /dev/null
+++ b/src/marqo/core/inference/models/abstract_model.py
@@ -0,0 +1,14 @@
+from abc import ABC, abstractmethod
+
+
+class AbstractModel(ABC):
+    """This is the abstract base class for all models in Marqo."""
+
+    @abstractmethod
+    def load(self):
+        """Load the model."""
+        pass
+
+    @abstractmethod
+    def encode(self):
+        pass
\ No newline at end of file
diff --git a/src/marqo/core/inference/models/open_clip_model_properties.py b/src/marqo/core/inference/models/open_clip_model_properties.py
new file mode 100644
index 000000000..80634994e
--- /dev/null
+++ b/src/marqo/core/inference/models/open_clip_model_properties.py
@@ -0,0 +1,62 @@
+
+from enum import Enum
+from typing import Optional, List
+
+from pydantic import Field
+
+from marqo.base_model import MarqoBaseModel
+from marqo.tensor_search.models.private_models import ModelLocation, ModelAuth
+
+
+class ImagePreprocessor(str, Enum):
+    SigLIP = "SigLIP"
+    OpenAI = "OpenAI"
+    OpenCLIP = "OpenCLIP"
+    MobileCLIP = "MobileCLIP"
+    CLIPA = "CLIPA"
+
+
+class Precision(str, Enum):
+    FP32 = "fp32"
+    FP16 = "fp16"
+
+
+class OpenCLIPModelProperties(MarqoBaseModel):
+    """
+    A class to represent the properties of an OpenCLIP model.
+
+    Attributes:
+        name: The name of the model. It can be the name of the model for loading information. e.g., the
+            architecture name of the model, the name of the model in the Hugging Face model hub, etc. It might be
+            the same as the model tag but this is not necessary.
+        type: The type of the model. It should be 'open_clip'.
+        jit: A boolean indicating whether the model is JIT compiled.
+        precision: The precision of the model. It should be either 'fp32' or 'fp16'.
+        url: The URL of the model checkpoint. It is optional.
+        model_location: The location of the model. It is optional.
+        tokenizer: The name of the tokenizer. It is optional.
+        model_auth: The authentication information for the model. It is optional.
+        image_preprocessor: The image preprocessor used by the model. It should be one of the values in the
+            ImagePreprocessor enum.
+        mean: The mean values for the image preprocessor. It is optional. It provided, it will override the
+            default mean values of the image preprocessor.
+        std: The standard deviation values for the image preprocessor. It is optional. It provided, it will
+            override the default standard deviation values of the image preprocessor.
+        size: The size of the image. It is optional. If provided, it will override the default size of the image.
+        note: A note about the model. It is optional.
+        pretrained: The name of the pretrained model. It is optional.
+    """
+    name: str
+    type: str
+    jit: bool = False
+    precision: Precision = Precision.FP32
+    url: Optional[str] = None
+    model_location: Optional[ModelLocation] = Field(default=None, alias="modelLocation")
+    tokenizer: Optional[str] = None
+    model_auth: Optional[ModelAuth] = Field(default=None, alias="modelAuth")
+    image_preprocessor: ImagePreprocessor = Field(default=ImagePreprocessor.OpenCLIP, alias="imagePreprocessor")
+    mean: Optional[List[float]] = None
+    std: Optional[List[float]] = None
+    size: Optional[int] = None
+    note: Optional[str] = None
+    pretrained: Optional[str] = None
\ No newline at end of file
diff --git a/src/marqo/core/inference/models/utils.py b/src/marqo/core/inference/models/utils.py
new file mode 100644
index 000000000..65c158e20
--- /dev/null
+++ b/src/marqo/core/inference/models/utils.py
@@ -0,0 +1,236 @@
+import os
+from io import BytesIO
+
+import certifi
+import numpy as np
+import pycurl
+import requests
+import torch
+import validators
+from PIL import Image, UnidentifiedImageError
+from requests.utils import requote_uri
+
+from marqo import marqo_docs
+from marqo.api.exceptions import InternalError
+from marqo.s2_inference.errors import ImageDownloadError
+from marqo.s2_inference.types import *
+from marqo.tensor_search.telemetry import RequestMetrics
+
+# TODO Merge this with the one in clip_utils in the future refactoring
+
+DEFAULT_HEADERS = {'User-Agent': 'Marqobot/1.0'}
+
+
+def get_allowed_image_types():
+    return {'.jpg', '.png', '.bmp', '.jpeg'}
+
+
+def _is_image(inputs: Union[str, List[Union[str, ImageType, ndarray]]]) -> bool:
+    # some logic to determine if something is an image or not
+    # assume the batch is the same type
+    # maybe we use something like this https://github.com/ahupp/python-magic
+
+    _allowed = get_allowed_image_types()
+
+    # we assume the batch is this way if a list
+    # otherwise apply over each element
+    if isinstance(inputs, list):
+
+        if len(inputs) == 0:
+            raise UnidentifiedImageError("received empty list, expected at least one element.")
+
+        thing = inputs[0]
+    else:
+        thing = inputs
+
+    # if it is a string, determine if it is a local file or url
+    if isinstance(thing, str):
+        name, extension = os.path.splitext(thing.lower())
+
+        # if it has the correct extension, asssume yes
+        if extension in _allowed:
+            return True
+
+        # if it is a local file without extension, then raise an error
+        if os.path.isfile(thing):
+            # we could also read the first part of the file and infer
+            raise UnidentifiedImageError(
+                f"local file [{thing}] extension {extension} does not match allowed file types of {_allowed}")
+        else:
+            # if it is not a local file and does not have an extension
+            # check if url
+            if validators.url(thing):
+                return True
+            else:
+                return False
+
+    # if it is an array, then it is an image
+    elif isinstance(thing, (ImageType, ndarray, Tensor)):
+        return True
+    else:
+        raise UnidentifiedImageError(f"expected type Image or str for inputs but received type {type(thing)}")
+
+
+def format_and_load_CLIP_images(images: List[Union[str, ndarray, ImageType]], image_download_headers: dict) -> List[
+    ImageType]:
+    """takes in a list of strings, arrays or urls and either loads and/or converts to PIL
+        for the clip model
+
+    Args:
+        images (List[Union[str, np.ndarray, ImageType]]): list of file locations or arrays (can be mixed)
+
+    Raises:
+        TypeError: _description_
+
+    Returns:
+        List[ImageType]: list of PIL images
+    """
+    if not isinstance(images, list):
+        raise TypeError(f"expected list but received {type(images)}")
+
+    results = []
+    for image in images:
+        results.append(format_and_load_CLIP_image(image, image_download_headers))
+
+    return results
+
+
+def format_and_load_CLIP_image(image: Union[str, ndarray, ImageType, Tensor],
+                               image_download_headers: dict) -> Union[ImageType, Tensor]:
+    """standardizes the input to be a PIL image
+
+    Args:
+        image (Union[str, np.ndarray, ImageType, Tensor]): can be a local file, url, array or a tensor
+
+    Raises:
+        ValueError: _description_
+        TypeError: _description_
+
+    Returns:
+        standardized the image:
+            ImageType: PIL image if input is a string, an array or a PIL image
+            Tensor: torch tensor if input is a torch tensor
+    """
+    # check for the input type
+    if isinstance(image, str):
+        img = load_image_from_path(image, image_download_headers)
+    elif isinstance(image, np.ndarray):
+        img = Image.fromarray(image.astype('uint8'), 'RGB')
+    elif isinstance(image, torch.Tensor):
+        img = image
+    elif isinstance(image, ImageType):
+        img = image
+    else:
+        raise UnidentifiedImageError(f"input of type {type(image)} "
+                                     f"did not match allowed types of str, np.ndarray, ImageType, Tensor")
+
+    return img
+
+
+def load_image_from_path(image_path: str, image_download_headers: dict, timeout_ms=3000,
+                         metrics_obj: Optional[RequestMetrics] = None) -> ImageType:
+    """Loads an image into PIL from a string path that is either local or a url
+
+    Args:
+        image_path (str): Local or remote path to image.
+        image_download_headers (dict): header for the image download
+        timeout_ms (int): timeout (in milliseconds), for the whole request
+    Raises:
+        ValueError: If the local path is invalid, and is not a url
+        UnidentifiedImageError: If the image is irretrievable or unprocessable.
+
+    Returns:
+        ImageType: In-memory PIL image.
+    """
+    if os.path.isfile(image_path):
+        img = Image.open(image_path)
+    elif validators.url(image_path):
+        if metrics_obj is not None:
+            metrics_obj.start(f"image_download.{image_path}")
+        try:
+            img_io: BytesIO = download_image_from_url(image_path, image_download_headers, timeout_ms)
+            img = Image.open(img_io)
+        except ImageDownloadError as e:
+            raise UnidentifiedImageError(str(e)) from e
+        finally:
+            if metrics_obj is not None:
+                metrics_obj.stop(f"image_download.{image_path}")
+    else:
+        raise UnidentifiedImageError(f"Input str of {image_path} is not a local file or a valid url. "
+                                     f"If you are using Marqo Cloud, please note that images can only be downloaded "
+                                     f"from a URL and local files are not supported. "
+                                     f"If you are running Marqo in a Docker container, you will need to use a Docker "
+                                     f"volume so that your container can access host files. "
+                                     f"For more information, please refer to: "
+                                     f"{marqo_docs.indexing_images()}")
+
+    return img
+
+
+def download_image_from_url(image_path: str, image_download_headers: dict, timeout_ms: int = 3000) -> BytesIO:
+    """Download an image from a URL and return a PIL image using pycurl.
+
+    Args:
+        image_path (str): URL to the image.
+        image_download_headers (dict): Headers for the image download.
+        timeout_ms (int): Timeout in milliseconds, for the whole request.
+
+    Returns:
+        buffer (BytesIO): The image as a BytesIO object.
+
+    Raises:
+        ImageDownloadError: If the image download fails.
+    """
+
+    if not isinstance(timeout_ms, int):
+        raise InternalError(f"timeout must be an integer but received {timeout_ms} of type {type(timeout_ms)}")
+
+    try:
+        encoded_url = encode_url(image_path)
+    except UnicodeEncodeError as e:
+        raise ImageDownloadError(f"Marqo encountered an error when downloading the image url {image_path}. "
+                                 f"The url could not be encoded properly. Original error: {e}")
+    buffer = BytesIO()
+    c = pycurl.Curl()
+    c.setopt(pycurl.CAINFO, certifi.where())
+    c.setopt(pycurl.URL, encoded_url)
+    c.setopt(pycurl.WRITEDATA, buffer)
+    c.setopt(pycurl.TIMEOUT_MS, timeout_ms)
+    c.setopt(pycurl.FOLLOWLOCATION, 1)
+
+    headers = DEFAULT_HEADERS.copy()
+    headers.update(image_download_headers)
+    c.setopt(pycurl.HTTPHEADER, [f"{k}: {v}" for k, v in headers.items()])
+
+    try:
+        c.perform()
+        if c.getinfo(pycurl.RESPONSE_CODE) != 200:
+            raise ImageDownloadError(f"image url `{image_path}` returned {c.getinfo(pycurl.RESPONSE_CODE)}")
+    except pycurl.error as e:
+        raise ImageDownloadError(f"Marqo encountered an error when downloading the image url {image_path}. "
+                                 f"The original error is: {e}")
+    finally:
+        c.close()
+    buffer.seek(0)
+    return buffer
+
+
+def encode_url(url: str) -> str:
+    """
+    Encode a URL to a valid format with only ASCII characters and reserved characters using percent-encoding.
+
+    In version 2.8, we replaced the requests library with pycurl for image downloads. Consequently, we need to implement
+    the URL encoding function ourselves. This function replicates the encoding behavior of the
+    'requests.utils.requote_uri' function from the requests library.
+
+    Args:
+        url (str): The URL to encode.
+
+    Returns:
+        str: The encoded URL.
+
+    Raises:
+        UnicodeEncodeError: If the URL cannot be encoded properly.
+
+    """
+    return requests.utils.requote_uri(url)
diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 38234b191..6eaffd705 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -13,17 +13,18 @@
 from PIL import Image, UnidentifiedImageError
 from multilingual_clip import pt_multilingual_clip
 from open_clip.pretrained import _pcfg, _slpcfg, _apcfg, _mccfg
-from open_clip.transform import image_transform_v2, PreprocessCfg
+from open_clip.transform import image_transform_v2, PreprocessCfg, merge_preprocess_dict
 from requests.utils import requote_uri
 from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
 from torchvision.transforms import InterpolationMode
 
 from marqo import marqo_docs
 from marqo.api.exceptions import InternalError
+from marqo.core.inference.models.abstract_clip_model import AbstractCLIPModel
+from marqo.core.inference.models.open_clip_model_properties import OpenCLIPModelProperties, ImagePreprocessor
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.errors import InvalidModelPropertiesError, ImageDownloadError
 from marqo.s2_inference.logger import get_logger
-from marqo.s2_inference.models.open_clip_model_properties import OpenCLIPModelProperties, ImagePreprocessor
 from marqo.s2_inference.processing.custom_clip_utils import HFTokenizer, download_model
 from marqo.s2_inference.types import *
 from marqo.tensor_search.enums import ModelProperties, InferenceParams
@@ -36,7 +37,8 @@
 OPENAI_DATASET_STD = (0.26862954, 0.26130258, 0.27577711)
 BICUBIC = InterpolationMode.BICUBIC
 DEFAULT_HEADERS = {'User-Agent': 'Marqobot/1.0'}
-
+HF_HUB_PREFIX = "hf-hub:"
+MARQO_OPEN_CLIP_REGISTRY_PREFIX = "open_clip/"
 
 def get_allowed_image_types():
     return set(('.jpg', '.png', '.bmp', '.jpeg'))
@@ -293,9 +295,11 @@ def __init__(self, model_type: str = "ViT-B/32", device: str = None, embedding_d
         if not device:
             raise InternalError("`device` is required for loading CLIP models!")
         self.device = device
+
         self.model = None
         self.tokenizer = None
-        self.processor = None
+        self.preprocess = None
+
         self.embedding_dimension = embedding_dim
         self.truncate = truncate
         self.model_properties = kwargs.get("model_properties", dict())
@@ -504,34 +508,48 @@ def load(self) -> None:
         self.model.eval()
 
 
-class OPEN_CLIP(CLIP):
-    def __init__(self, model_type: str = "open_clip/ViT-B-32-quickgelu/laion400m_e32", device: str = None,
-                 embedding_dim: int = None,
-                 truncate: bool = True, **kwargs) -> None:
-        super().__init__(model_type, device, embedding_dim, truncate, **kwargs)
+class OPEN_CLIP(AbstractCLIPModel):
+    def __init__(
+            self,
+            model_type: str = "open_clip/ViT-B-32-quickgelu/laion400m_e32",
+            device: Optional[str] = None,
+            embedding_dim: Optional[int] = None,
+            truncate: bool = True,
+            model_properties: Optional[Dict] = None,
+            model_auth: Optional[Dict] = None
+    ) -> None:
+
+        super().__init__(model_type, device, embedding_dim, truncate, model_properties)
+
+        # model_auth gets passed through add_docs and search requests:
+        self.model_auth = model_auth
         self.preprocess_config = None
         self.model_name = model_type.split("/", 3)[1] if model_type.startswith("open_clip/") else model_type
         self.pretrained = model_type.split("/", 3)[2] if model_type.startswith("open_clip/") else model_type
         self.model_properties = OpenCLIPModelProperties(**self.model_properties)
 
     def load(self) -> None:
+        """Load the open_clip model and tokenizer."""
         if self.model_properties.url or self.model_properties.model_location:
             self.model, self.preprocess = self._load_model_and_image_preprocessor_from_checkpoint()
             self.tokenizer = self._load_tokenizer_from_checkpoint()
-        elif self.model_type.startswith("open_clip/"):
-            self.model, self.preprocess = self._load_model_and_image_preprocessor_from_open_clip_repo()
-            self.tokenizer = self._load_tokenizer_from_open_clip_repo()
-        elif self.model_type.startswith("hf-hub:"):
+        elif self.model_properties.name.startswith(HF_HUB_PREFIX):
             self.model, self.preprocess = self._load_model_and_image_preprocessor_from_hf_repo()
             self.tokenizer = self._load_tokenizer_from_hf_repo()
+        elif self.model_tag.startswith(MARQO_OPEN_CLIP_REGISTRY_PREFIX):
+            self.model, self.preprocess = self._load_model_and_image_preprocessor_from_open_clip_repo()
+            self.tokenizer = self._load_tokenizer_from_open_clip_repo()
         else:
             raise InvalidModelPropertiesError(
                 f"Marqo cannot load the provided open_clip model. "
                 f"Check {marqo_docs.bring_your_own_model()} "
-                f"for more details on how to load a open_clip model "
+                f"for more details on the supported methods to open_clip model "
             )
         self.model.eval()
 
+    def load_tokenizer(self):
+        pass
+
     def _load_image_preprocessor(self) -> Callable:
         return image_transform_v2(self.preprocess_config)
 
@@ -539,22 +557,23 @@ def _aggregate_image_preprocessor_config(self) -> PreprocessCfg:
         """Aggregate the image preprocessor configuration for the open_clip model."""
 
         if self.model_properties.image_preprocessor in [ImagePreprocessor.OpenCLIP, ImagePreprocessor.OpenAI]:
-            image_preprocess_config = PreprocessCfg(**_pcfg)
-        elif self.model_properties.image_preprocessor in [ImagePreprocessor.SiCLIP]:
-            image_preprocess_config = PreprocessCfg(**_slpcfg)
+            base_image_preprocess_config = _pcfg()
+        elif self.model_properties.image_preprocessor in [ImagePreprocessor.SigLIP]:
+            base_image_preprocess_config = _slpcfg()
         elif self.model_properties.image_preprocessor in [ImagePreprocessor.CLIPA]:
-            image_preprocess_config = PreprocessCfg(**_apcfg)
+            base_image_preprocess_config = _apcfg()
         elif self.model_properties.image_preprocessor in [ImagePreprocessor.MobileCLIP]:
-            image_preprocess_config = PreprocessCfg(**_mccfg)
+            base_image_preprocess_config = _mccfg()
         else:
             raise ValueError(f"Invalid image preprocessor {self.model_properties.image_preprocessor}")
 
-        if self.model_properties.mean:
-            image_preprocess_config.image_mean = self.model_properties.mean
-        if self.model_properties.std:
-            image_preprocess_config.image_std = self.model_properties.std
+        aggregated_image_preprocess_config = PreprocessCfg(
+            **merge_preprocess_dict(
+                base_image_preprocess_config, self.model_properties.dict(exclude_none=True)
+            )
+        )
 
-        return image_preprocess_config
+        return aggregated_image_preprocess_config
 
     def _load_model_and_image_preprocessor_from_checkpoint(self) -> Tuple[torch.nn.Module, Compose]:
         """Load the model and image preprocessor from a checkpoint file.
@@ -562,9 +581,6 @@ def _load_model_and_image_preprocessor_from_checkpoint(self) -> Tuple[torch.nn.M
         The checkpoint file can be provided through a URL or a model_location object.
         """
         # Load the image preprocessor
-        self.preprocess_config = self._aggregate_image_preprocessor_config()
-        self.preprocess = image_transform_v2(self.preprocess_config, is_train=False)
-
         if self.model_properties.url and self.model_properties.model_location:
             raise InvalidModelPropertiesError(
                 "Only one of url, model_location can be specified in 'model_properties' "
@@ -580,13 +596,13 @@ def _load_model_and_image_preprocessor_from_checkpoint(self) -> Tuple[torch.nn.M
         logger.info(f"The name of the custom clip model is {self.model_name}. We use open_clip loader")
 
         try:
-            model, _, preprocess = open_clip.create_model_and_transforms(
-                model_name=self.model_name,
+            self.preprocess_config = self._aggregate_image_preprocessor_config()
+            preprocess = image_transform_v2(self.preprocess_config, is_train=False)
+            model = open_clip.create_model(
+                model_name=self.model_properties.name,
                 jit=self.model_properties.jit,
                 pretrained=self.model_path,
                 precision=self.model_properties.precision,
-                image_mean=self.model_properties.mean,
-                image_std=self.model_properties.std,
                 device=self.device,
                 cache_dir=ModelCache.clip_cache_path
             )
@@ -637,7 +653,7 @@ def _load_model_and_image_preprocessor_from_hf_repo(self) -> Tuple[torch.nn.Modu
         The hf_repo should be provided in the model properties, and it is a string starting with `hf-hub:`.
         """
         model, _, preprocess = open_clip.create_model_and_transforms(
-            self.model_type,
+            model_name=self.model_properties.name,
             device=self.device,
             cache_dir=ModelCache.clip_cache_path,
         )
@@ -648,11 +664,11 @@ def _load_model_and_image_preprocessor_from_open_clip_repo(self) -> Tuple[torch.
 
         The model name should be provided in the model properties, and it is a string starting with `open_clip/`.
         """
-        self.model_name = self.model_type.split("/", 3)[1]
-        self.pretrained = self.model_type.split("/", 3)[2]
+        self.model_name = self.model_tag.split("/", 3)[1]
+        self.pretrained = self.model_tag.split("/", 3)[2]
 
         model, _, preprocess = open_clip.create_model_and_transforms(
-            self.model_name,
+            model_name=self.model_name,
             pretrained=self.pretrained,
             device=self.device,
             cache_dir=ModelCache.clip_cache_path
@@ -661,13 +677,13 @@ def _load_model_and_image_preprocessor_from_open_clip_repo(self) -> Tuple[torch.
 
     def _load_tokenizer_from_checkpoint(self) -> Callable:
         if not self.model_properties.tokenizer:
-            return open_clip.get_tokenizer(self.model_name)
+            return open_clip.get_tokenizer(self.model_properties.name)
         else:
             logger.info(f"Custom HFTokenizer is provided. Loading...")
             return HFTokenizer(self.model_properties.tokenizer)
 
     def _load_tokenizer_from_hf_repo(self) -> Callable:
-        return open_clip.get_tokenizer(self.model_type)
+        return open_clip.get_tokenizer(self.model_properties.name)
 
     def _load_tokenizer_from_open_clip_repo(self) -> Callable:
         return open_clip.get_tokenizer(self.model_name)
diff --git a/src/marqo/s2_inference/model_registry.py b/src/marqo/s2_inference/model_registry.py
index 43ba4c53a..99dd68c5b 100644
--- a/src/marqo/s2_inference/model_registry.py
+++ b/src/marqo/s2_inference/model_registry.py
@@ -475,6 +475,18 @@ def _get_open_clip_properties() -> Dict:
             "note": "open_clip model: ViT-B-32-256/datacomp_s34b_b86k",
             "type": "open_clip",
             "pretrained": "datacomp_s34b_b86k"
+        },
+        "open_clip/ViT-B-16/marqo-fashionCLIP": {
+            "name": "hf-hub:Marqo/marqo-fashionCLIP",
+            "dimensions": 512,
+            "note": "Marqo's fashionCLIP model",
+            "type": "open_clip"
+        },
+        "open_clip/ViT-B-16-SigLIP/marqo-fashionSigLIP": {
+            "name": "hf-hub:Marqo/marqo-fashionSigLIP",
+            "dimensions": 512,
+            "note": "Marqo's fashionSigLIP model",
+            "type": "open_clip"
         }
     }
     return OPEN_CLIP_MODEL_PROPERTIES
diff --git a/src/marqo/s2_inference/models/open_clip_model_properties.py b/src/marqo/s2_inference/models/open_clip_model_properties.py
deleted file mode 100644
index d87a094a1..000000000
--- a/src/marqo/s2_inference/models/open_clip_model_properties.py
+++ /dev/null
@@ -1,36 +0,0 @@
-
-from enum import Enum
-from typing import Optional, List
-
-from pydantic import Field
-
-from marqo.base_model import MarqoBaseModel
-from marqo.tensor_search.models.private_models import ModelLocation, ModelAuth
-
-
-class ImagePreprocessor(str, Enum):
-    SiCLIP = "SiCLIP"
-    OpenAI = "OpenAI"
-    OpenCLIP = "OpenCLIP"
-    MobileCLIP = "MobileCLIP"
-    CLIPA = "CLIPA"
-
-
-class Precision(str, Enum):
-    FP32 = "fp32"
-    FP16 = "fp16"
-
-
-class OpenCLIPModelProperties(MarqoBaseModel):
-    name: str
-    type: str
-    jit: bool = False
-    precision: Precision = Precision.FP32
-    url: Optional[str] = None
-    model_location: Optional[ModelLocation] = Field(default=None, alias="modelLocation")
-    tokenizer: Optional[str] = None
-    model_auth: Optional[ModelAuth] = Field(default=None, alias="modelAuth")
-    image_preprocessor: ImagePreprocessor = Field(default=ImagePreprocessor.OpenCLIP, alias="imagePreprocessor")
-    mean: Optional[List[float]] = None
-    std: Optional[List[float]] = None
-    size: Optional[int] = None
\ No newline at end of file
diff --git a/tests/s2_inference/open_clip_models/__init__.py b/tests/s2_inference/open_clip_models/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/s2_inference/open_clip_models/test_marqo_fashion_clip.py b/tests/s2_inference/open_clip_models/test_marqo_fashion_clip.py
new file mode 100644
index 000000000..3a366f8ad
--- /dev/null
+++ b/tests/s2_inference/open_clip_models/test_marqo_fashion_clip.py
@@ -0,0 +1,615 @@
+from unittest import TestCase
+
+import numpy as np
+
+from marqo.s2_inference.clip_utils import OPEN_CLIP
+from marqo.s2_inference.model_registry import _get_open_clip_properties
+from marqo.s2_inference.s2_inference import clear_loaded_models
+
+OPEN_CLIP_MODEL_PROPERTIES = _get_open_clip_properties()
+
+
+class TestMarqoFashionCLIP(TestCase):
+    """A test class for Marqo Fashion CLIP model.
+
+    There are generally 3 parts in a OpenCLIP model:
+    1. Model,
+    2. Tokenizer, and
+    3. Image preprocessor.
+
+    We need to ensure all these parts are working as expected.
+    """
+
+    IMAGE_CONTENT = "https://github.com/marqo-ai/marqo-FashionCLIP/blob/main/docs/fashion-hippo.png?raw=true"
+    TEXT_CONTENT = "a hat"
+
+    FASHIONCLIP_IMAGE_EMBEDDING = np.squeeze(np.array([
+        [
+            -5.5725e-02, -6.1073e-03, -1.1017e-01, -4.7852e-02, 2.4658e-02,
+            -1.2901e-02, -1.2457e-01, -3.0777e-02, -7.7026e-02, 4.2969e-02,
+            4.1046e-03, -6.2752e-03, -3.6011e-02, -2.2781e-02, 1.7120e-02,
+            -2.4872e-02, 9.0759e-02, 1.7609e-02, -1.3977e-02, 4.3304e-02,
+            -3.4119e-02, 1.2573e-02, 6.4453e-02, 7.4280e-02, 5.1727e-02,
+            5.3009e-02, 2.5253e-02, 6.2073e-02, -2.8687e-02, -3.6163e-02,
+            2.3865e-02, -1.0506e-02, 5.1147e-02, -6.6757e-03, 2.1393e-02,
+            1.5450e-02, -3.7994e-02, -1.2131e-03, 1.2999e-03, -2.6657e-02,
+            -1.1925e-02, 1.5160e-02, 1.3252e-02, -1.3971e-03, 3.5645e-02,
+            -5.7587e-02, -1.0150e-01, 1.7517e-02, -4.9500e-02, 1.5190e-02,
+            -7.0557e-02, 1.9867e-02, 2.9160e-02, -3.1021e-02, -6.6833e-02,
+            1.2962e-02, -3.9062e-03, -3.6713e-02, 1.9531e-03, 3.3539e-02,
+            5.5054e-02, -6.8817e-03, 1.7242e-02, 1.2756e-02, -2.6520e-02,
+            1.2772e-02, 3.4790e-02, -1.6069e-03, -3.9948e-02, 7.9163e-02,
+            7.9346e-02, -2.2621e-03, 2.6588e-03, -5.7259e-03, -6.6650e-02,
+            1.6312e-02, 2.2324e-02, -1.0567e-02, -2.1400e-03, -4.0833e-02,
+            3.0075e-02, -5.9357e-03, 1.2115e-02, -6.8298e-02, 4.5258e-02,
+            -3.2440e-02, 4.0161e-02, -3.0632e-03, 7.5607e-03, 2.5833e-02,
+            2.5925e-02, 4.6692e-02, -5.8990e-02, -5.0476e-02, 7.5760e-03,
+            1.7548e-02, 2.8976e-02, -2.9175e-02, -8.6365e-02, -3.5248e-02,
+            -1.2573e-02, -1.3763e-02, 9.0866e-03, 4.0817e-03, -1.1108e-02,
+            -3.2562e-02, -3.5706e-02, 1.1765e-02, 5.7709e-02, -2.0523e-03,
+            -9.6741e-02, 2.0691e-02, 8.2764e-02, -2.9240e-03, 7.3730e-02,
+            -2.1896e-02, 1.9180e-02, 4.9652e-02, 3.6896e-02, 2.4109e-02,
+            2.9449e-02, -1.3779e-02, -5.7190e-02, 8.9169e-04, 2.9327e-02,
+            6.0486e-02, 4.5776e-02, 1.6205e-02, -1.3794e-02, 5.0629e-02,
+            5.5573e-02, 2.2278e-02, -7.2876e-02, 9.3842e-04, -6.7810e-02,
+            -1.9875e-03, 2.6306e-02, -5.6763e-02, -3.8910e-02, -4.0771e-02,
+            -2.2598e-02, 1.6418e-01, 1.2109e-01, -2.0737e-02, -2.0462e-02,
+            -4.8027e-03, 4.6173e-02, 7.3395e-03, 5.6702e-02, -4.2297e-02,
+            -2.6566e-02, -3.4866e-03, -2.8305e-02, 4.2229e-03, -2.3060e-03,
+            5.5504e-03, -7.9346e-03, -1.6006e-02, 3.4119e-02, 2.6413e-02,
+            1.0461e-01, 3.1921e-02, 9.7427e-03, 3.3875e-02, 8.4534e-03,
+            3.5645e-02, -5.5420e-02, -5.7945e-03, -4.0680e-02, -2.9541e-02,
+            -5.7892e-02, 5.4413e-02, 7.6111e-02, -1.8520e-03, -8.4763e-03,
+            7.1106e-02, 1.3840e-02, -5.1193e-03, 3.2776e-02, -6.3019e-03,
+            -5.0598e-02, -1.1072e-01, 4.0771e-02, -3.0640e-02, -4.3396e-02,
+            3.1830e-02, -2.6016e-02, 2.3941e-02, 3.0792e-02, 6.1554e-02,
+            1.6413e-03, 1.0490e-02, -2.1637e-02, -1.7151e-02, -3.3844e-02,
+            6.3171e-02, 4.7119e-02, -5.0873e-02, 3.1555e-02, 3.1372e-02,
+            -7.4844e-03, -2.7943e-04, 6.3095e-03, 5.5504e-03, -1.1234e-03,
+            -3.1204e-02, 1.4633e-02, 4.5593e-02, 2.1362e-03, -6.0730e-02,
+            -8.3069e-02, 2.5158e-03, -4.2114e-02, -3.2043e-02, 2.5360e-02,
+            5.9540e-02, -2.2522e-02, -2.3148e-02, -1.6037e-02, 6.2012e-02,
+            2.3010e-02, -7.6965e-02, -3.8208e-02, 6.9046e-03, 1.5210e-01,
+            6.8016e-03, 3.7766e-03, -3.8239e-02, -2.3682e-02, 4.3488e-02,
+            -3.2568e-01, -1.4877e-02, 3.2013e-02, 2.0660e-02, 2.8564e-02,
+            -5.5450e-02, 4.8004e-02, 8.9493e-03, 6.7566e-02, -3.2837e-02,
+            -4.0863e-02, 2.0542e-03, 1.5839e-02, 3.8727e-02, 1.3123e-02,
+            -4.5776e-02, 4.1534e-02, 8.6853e-02, 3.0121e-02, 6.6261e-03,
+            2.4918e-02, -2.0233e-02, 7.4844e-03, -2.3453e-02, -7.9468e-02,
+            -1.7868e-02, -2.6855e-03, -1.6891e-02, -1.0260e-01, 5.3644e-04,
+            1.6647e-02, -6.1249e-02, 4.1321e-02, 2.6001e-02, -2.2774e-03,
+            -3.5950e-02, 1.5610e-02, 9.8114e-03, -4.3983e-03, 3.1738e-02,
+            7.5645e-03, 1.2646e-03, -1.3489e-02, -3.1185e-03, -4.4769e-02,
+            -1.1921e-03, -4.2439e-04, 1.6129e-02, -3.5950e-02, -2.1072e-02,
+            -3.5381e-03, -8.0627e-02, 4.8431e-02, 7.3547e-02, 3.4424e-02,
+            -1.2360e-02, 5.9235e-02, -1.4595e-02, 1.1108e-02, 6.8970e-03,
+            1.6617e-02, 2.0325e-02, 1.0767e-01, 1.2047e-02, -4.0894e-02,
+            2.3590e-02, -2.0599e-02, 3.5614e-02, -3.3752e-02, -1.2585e-01,
+            3.0502e-02, 2.4292e-02, -2.1484e-02, -5.3528e-02, -5.2002e-02,
+            4.9194e-02, 3.1158e-02, 9.1324e-03, 9.8389e-02, -2.5879e-02,
+            -4.6783e-02, 2.7847e-03, -1.3321e-02, -1.5900e-02, -2.3972e-02,
+            -4.9744e-02, -3.4084e-03, -6.4148e-02, 6.3591e-03, -9.2041e-02,
+            -1.4442e-02, -1.0826e-02, -2.3178e-02, -3.3356e-02, 7.0740e-02,
+            -2.1484e-02, 1.6632e-02, 4.4647e-02, -5.9280e-03, 2.8229e-03,
+            1.0475e-02, -2.3865e-02, -1.1765e-02, -4.0054e-03, -2.2629e-02,
+            9.6054e-03, 2.4368e-02, -2.0630e-02, -1.8280e-02, -3.2654e-02,
+            -2.8610e-02, -3.3783e-02, -2.5925e-02, -2.3361e-02, -1.4946e-02,
+            1.5778e-02, -7.3242e-02, -6.4754e-04, 1.3916e-01, -7.2754e-02,
+            -2.3178e-02, -2.6901e-02, 4.9591e-02, 7.3242e-02, 4.2297e-02,
+            1.3130e-02, 9.7229e-02, 3.1494e-02, 8.4839e-02, -2.7752e-03,
+            2.7802e-02, -1.2550e-02, 5.2155e-02, 7.2083e-02, -2.4750e-02,
+            3.4912e-02, -2.8305e-02, 3.0396e-02, -1.7181e-02, 3.9276e-02,
+            2.5654e-04, -3.6068e-03, -1.3924e-02, -6.6719e-03, 1.6068e-02,
+            1.1711e-02, 2.1179e-02, 2.5635e-03, 8.2397e-03, -9.5825e-03,
+            2.1332e-02, 5.9433e-03, 1.8280e-02, -2.1500e-02, 1.4633e-02,
+            3.9734e-02, -1.6922e-02, 2.4967e-03, 5.2757e-03, -5.8403e-03,
+            -1.5686e-02, -1.0506e-02, 1.7334e-02, -9.1675e-02, 3.8879e-02,
+            -4.3365e-02, -2.6155e-04, -5.5962e-03, 8.0643e-03, 2.3804e-02,
+            3.7327e-03, -1.4648e-02, -1.9073e-02, 2.5391e-02, -1.9272e-02,
+            1.3924e-02, 1.4328e-02, 4.2511e-02, 5.3375e-02, -1.8082e-02,
+            1.3452e-01, -4.5929e-02, 1.7975e-02, -4.3030e-02, 3.9482e-04,
+            1.2306e-02, -4.3243e-02, 1.7487e-02, -1.1237e-01, -2.9755e-02,
+            1.9867e-02, -5.5481e-02, 2.7512e-02, -4.3304e-02, 2.4475e-02,
+            -4.9957e-02, 3.2074e-02, -9.3567e-02, 1.3603e-02, 3.9673e-02,
+            1.8250e-02, 7.0572e-03, 5.2368e-02, 3.7880e-03, -3.8910e-03,
+            -3.6896e-02, 2.2690e-02, -2.4979e-02, 1.7738e-03, -2.7222e-02,
+            -3.2074e-02, 7.6660e-02, 5.4588e-03, 2.2476e-02, 4.2725e-03,
+            6.2195e-02, 4.1779e-02, 2.8290e-02, 2.7580e-03, 8.5449e-02,
+            -4.2603e-02, -7.2998e-02, 3.1006e-02, 7.4272e-03, 1.2383e-02,
+            -8.5205e-02, 6.8787e-02, -1.0872e-02, -3.1250e-02, 3.2288e-02,
+            5.0629e-02, -7.0618e-02, 1.1841e-02, -1.0181e-01, 1.0605e-02,
+            4.2511e-02, 9.2697e-03, -1.3977e-02, 1.1681e-02, -1.2337e-02,
+            3.5645e-02, -5.1117e-02, 3.9673e-02, 3.1891e-02, -9.1614e-02,
+            -2.5208e-02, 1.0361e-02, 4.6272e-03, -4.0527e-02, 6.0547e-02,
+            1.9882e-02, 3.7174e-03, 3.5828e-02, 5.5115e-02, -2.1667e-03,
+            -4.2000e-03, -3.9185e-02, 4.0649e-02, -4.7729e-02, -2.2568e-02,
+            7.1045e-02, -1.0536e-02, 1.4740e-02, -1.0788e-02, 4.2358e-02,
+            -2.8976e-02, 1.7181e-02, -1.9974e-02, 4.2999e-02, -3.6285e-02,
+            -4.2999e-02, 4.0497e-02, -2.9007e-02, 2.2339e-02, 2.5314e-02,
+            -3.3295e-02, -7.2823e-03, 3.1769e-02, 1.0139e-02, -6.7932e-02,
+            -3.6804e-02, -2.6260e-02
+        ]
+    ]))
+
+    FASHIONCLIP_TEXT_EMBEDDING = np.squeeze(np.array([
+        [
+            -2.6367e-02, 1.2276e-02, -7.1716e-03, -1.1559e-02, 9.5654e-04,
+            2.2583e-02, -4.5052e-03, -1.8921e-02, -1.0292e-02, -3.5496e-03,
+            -1.8967e-02, -2.8931e-02, 3.0685e-02, 1.2077e-02, 1.0551e-02,
+            -3.4058e-02, 3.1219e-02, -1.0445e-02, 2.1667e-02, -1.2321e-02,
+            6.3057e-03, 2.3849e-02, -3.5126e-02, 2.0035e-02, 1.0124e-02,
+            7.0251e-02, -4.2084e-02, 8.8272e-03, -3.6011e-02, 1.4198e-02,
+            2.2446e-02, 9.9945e-03, 4.1389e-03, 2.0172e-02, 7.5226e-03,
+            -1.0307e-02, -8.3313e-02, 3.3020e-02, 4.7058e-02, 1.8372e-02,
+            1.4610e-02, -1.4786e-02, -5.1514e-02, 5.0583e-03, -1.6693e-02,
+            -4.2877e-03, -4.3488e-02, 6.0272e-02, 3.3081e-02, 7.8430e-02,
+            -3.6682e-02, 1.6708e-02, -9.6283e-03, 5.1270e-03, 1.7761e-02,
+            2.5139e-03, 3.8208e-02, -2.9358e-02, -1.9562e-02, 2.5009e-02,
+            -1.0979e-02, 4.3243e-02, -4.8332e-03, -1.8387e-02, 2.5726e-02,
+            4.2206e-02, 4.1504e-03, 5.9624e-03, -3.6621e-02, -2.6798e-03,
+            -5.8556e-03, -2.3712e-02, -6.1859e-02, -1.1497e-02, -2.7103e-03,
+            2.6031e-02, -1.5617e-02, 5.6229e-03, 1.4000e-03, -9.9243e-02,
+            -3.3386e-02, -1.3130e-02, -2.6138e-02, -1.5778e-02, 5.1636e-02,
+            9.8038e-03, 7.4615e-03, 2.9327e-02, -3.9062e-02, 4.8035e-02,
+            -4.9713e-02, -1.1528e-02, 3.0273e-02, 1.7883e-02, -7.6218e-03,
+            3.6011e-02, 9.9945e-03, -1.4488e-02, -4.2114e-02, 1.5686e-02,
+            3.9612e-02, -2.9999e-02, -3.1677e-02, 3.3539e-02, 9.7122e-03,
+            -1.5686e-02, -1.0506e-02, 6.3843e-02, -3.4657e-03, -4.3488e-02,
+            -2.3499e-02, -1.4862e-02, 6.1127e-02, -2.9419e-02, 5.6854e-02,
+            -3.0613e-03, 3.9246e-02, 1.2718e-02, -1.6220e-02, 4.3797e-04,
+            -2.0233e-02, -4.3488e-02, -5.7068e-03, -2.6855e-02, -5.5145e-02,
+            2.9869e-03, -2.7969e-02, -2.2812e-02, -4.0070e-02, 3.5496e-03,
+            2.7466e-03, 2.7939e-02, 1.0391e-02, 3.4821e-02, -6.4545e-03,
+            -5.2429e-02, -1.3565e-02, -3.5614e-02, -7.9422e-03, 5.5450e-02,
+            -3.8086e-02, 6.5186e-02, -2.5894e-02, -2.8397e-02, -1.8585e-02,
+            1.2810e-02, 1.4709e-02, 1.6251e-02, -4.3427e-02, -2.5009e-02,
+            -4.0100e-02, 1.8225e-03, 4.0680e-02, 1.4847e-02, 8.1940e-03,
+            -9.9716e-03, -3.6530e-02, 6.2988e-02, 3.7292e-02, -1.8799e-02,
+            7.3853e-02, 2.9282e-02, -5.4749e-02, -1.2444e-02, -1.0132e-02,
+            -1.4854e-02, -8.3313e-03, 3.4698e-02, -4.3457e-02, -2.4643e-02,
+            3.9795e-02, -1.0216e-02, 6.1462e-02, -1.8524e-02, -4.7951e-03,
+            3.1952e-02, 1.4999e-02, -3.4760e-02, -3.9172e-04, 2.4834e-03,
+            -1.1749e-03, -3.1830e-02, 4.0039e-02, -3.6804e-02, -1.4107e-02,
+            6.1218e-02, -1.2894e-02, -3.5400e-03, 1.8524e-02, -1.2585e-01,
+            -2.3994e-03, 4.1107e-02, 1.4137e-02, -1.9293e-03, -1.1742e-02,
+            2.3163e-02, -2.1835e-02, -4.1229e-02, -2.6855e-02, 3.1174e-02,
+            -3.3936e-02, -3.5839e-03, 1.6113e-02, -2.3727e-02, -9.2239e-03,
+            3.5492e-02, 2.3003e-03, -2.5589e-02, 2.3224e-02, -1.7410e-02,
+            -1.0933e-02, -1.2825e-02, -2.7573e-02, -1.7334e-02, -1.5610e-02,
+            8.7678e-05, 1.2581e-02, 9.8114e-03, -3.6835e-02, 2.9266e-02,
+            4.6967e-02, -6.0120e-02, -3.7842e-02, -2.2507e-02, 3.3508e-02,
+            -3.6682e-02, 7.2266e-02, -4.9973e-03, -2.8992e-02, -1.2939e-02,
+            -1.2646e-01, -2.3117e-02, 4.9286e-03, -1.3794e-02, 3.7354e-02,
+            5.8655e-02, 2.1973e-02, 6.5765e-03, -1.5434e-02, -8.4290e-02,
+            3.7842e-02, 1.0307e-02, 4.6875e-02, -4.8065e-03, -1.5686e-02,
+            -1.1742e-02, -7.9575e-03, -2.9007e-02, -1.7822e-02, 4.4556e-02,
+            -1.4877e-02, 1.9531e-02, -2.5101e-02, -3.4760e-02, -1.3435e-02,
+            -3.4363e-02, -1.1230e-02, 1.0040e-02, 1.7838e-02, 4.3488e-02,
+            1.5778e-02, 2.9343e-02, -9.2411e-04, 8.1940e-03, 2.2888e-02,
+            -6.9702e-02, 4.1962e-02, 3.9291e-03, 2.2186e-02, 4.2023e-02,
+            -3.4485e-03, 8.9874e-03, -4.5967e-03, -1.6632e-02, 2.7893e-02,
+            -2.5009e-02, 5.2399e-02, 3.7689e-02, -7.5836e-03, -3.0914e-02,
+            -1.4191e-03, -2.7832e-02, 2.2888e-02, 7.2144e-02, 1.6144e-02,
+            5.1514e-02, -2.4887e-02, -2.3346e-02, -3.5004e-02, 1.3710e-02,
+            8.4915e-03, 7.6866e-03, 5.4359e-03, -2.2156e-02, -1.4214e-02,
+            2.9190e-02, 5.0751e-02, -3.2883e-03, -2.4338e-02, 1.1625e-03,
+            3.5431e-02, -2.1790e-02, -1.3588e-02, -3.7842e-02, 9.9792e-03,
+            3.6469e-02, -1.3494e-03, -1.4061e-02, -1.6785e-02, 5.2605e-03,
+            7.3853e-03, 3.2776e-02, -2.6932e-02, -2.2297e-03, -4.6570e-02,
+            -1.6083e-02, 7.6523e-03, -8.5068e-03, 3.9291e-03, -8.6212e-03,
+            -3.2425e-04, 2.7481e-02, -2.5444e-03, -7.0129e-02, 2.1072e-02,
+            -4.2633e-02, 5.0049e-03, 3.0136e-02, 3.5858e-02, -3.1590e-06,
+            2.7222e-02, -1.9913e-02, 3.2196e-02, -2.1667e-02, 1.9989e-02,
+            1.0864e-02, 5.3009e-02, -4.3793e-03, 4.4647e-02, -9.8495e-03,
+            -2.3117e-02, -1.8936e-02, -3.1158e-02, -2.9877e-02, -2.5726e-02,
+            -2.8133e-04, -2.5238e-02, -1.1024e-02, -8.9539e-02, 9.3002e-03,
+            -2.4643e-02, 2.6913e-03, 1.5610e-02, 2.5005e-03, 6.8970e-02,
+            5.0690e-02, 2.8915e-02, 1.9241e-02, 2.5040e-02, 4.4983e-02,
+            5.2673e-02, -4.2328e-02, 3.1757e-03, 3.3417e-02, -2.6367e-02,
+            4.2145e-02, -4.3823e-02, 5.6267e-03, -1.4595e-02, 6.1302e-03,
+            1.6083e-02, 8.4000e-03, 8.4457e-03, -3.4271e-02, 2.5757e-02,
+            2.9404e-02, 3.3783e-02, 9.8801e-03, -7.9575e-03, 2.5375e-02,
+            -9.9182e-04, 3.0624e-02, -5.8031e-04, -1.5282e-02, 1.6312e-02,
+            3.0121e-02, -1.5631e-03, 4.2236e-02, -6.3057e-03, 7.0457e-03,
+            -4.3457e-02, -4.3411e-03, -4.7226e-03, 6.0059e-02, 5.0690e-02,
+            -1.1055e-02, -5.0568e-02, -5.6305e-03, 5.3825e-03, -6.9092e-02,
+            -1.3533e-03, 5.6610e-02, -2.0370e-02, -7.6561e-03, -4.8141e-03,
+            1.4782e-03, 3.0380e-02, -5.7709e-02, -8.0032e-03, 1.3588e-02,
+            6.1981e-02, -4.8004e-02, -1.1253e-02, 3.4924e-03, 8.2779e-03,
+            -2.3621e-02, -1.6449e-02, 2.8503e-02, -1.8707e-02, -2.5528e-02,
+            -8.9798e-03, -7.1631e-01, -5.3368e-03, -3.7262e-02, -2.0111e-02,
+            -4.3945e-02, 3.2825e-03, -1.0597e-02, 1.8108e-04, -3.7670e-03,
+            -1.2215e-02, -4.8676e-02, 1.3428e-02, -4.6234e-02, 1.6159e-02,
+            1.0864e-02, -4.4060e-03, -9.1476e-03, -1.2032e-02, 3.1082e-02,
+            1.6769e-02, -1.0866e-04, 3.7781e-02, -9.3918e-03, -6.0844e-03,
+            -6.5956e-03, -1.0597e-02, 3.5492e-02, -4.5166e-02, 3.6411e-03,
+            2.7405e-02, -1.2413e-02, 3.5004e-02, -1.4755e-02, 3.1860e-02,
+            1.4977e-02, -1.0567e-02, -3.5461e-02, -2.6703e-02, 1.8661e-02,
+            -1.3786e-02, -1.2131e-03, 6.6328e-04, -9.2712e-02, -4.3091e-02,
+            -2.2491e-02, -1.0017e-02, -2.5711e-02, -1.5778e-02, 1.7487e-02,
+            -2.4811e-02, -6.7673e-03, -2.9114e-02, 3.3607e-03, 4.1351e-02,
+            -4.8065e-02, -6.6299e-03, -1.4008e-02, -2.9354e-03, 4.3121e-02,
+            1.6846e-02, -2.4307e-02, 4.4037e-02, 3.2318e-02, -2.7908e-02,
+            -1.8906e-02, -3.2440e-02, 1.5945e-02, 4.0039e-02, 2.4139e-02,
+            4.7699e-02, -2.6031e-02, 2.2888e-02, -1.0101e-02, 4.1695e-03,
+            1.5427e-02, 5.3101e-03, 9.6283e-03, 9.8816e-02, 3.4149e-02,
+            -1.9562e-02, -2.6215e-02, -3.6499e-02, -2.0996e-02, 4.8981e-02,
+            1.7227e-02, 3.6682e-02, 3.1830e-02, 1.9165e-02, -1.6220e-02,
+            -3.6896e-02, -5.0583e-03
+        ]
+    ]))
+
+    SiGLIP_IMAGE_EMBEDDING = np.squeeze(np.array([
+        [
+            -2.4643e-02, 5.8060e-03, -9.6359e-03, -2.4826e-02, 3.4576e-02,
+            4.5746e-02, 5.8594e-03, -7.0572e-03, 3.8757e-02, 2.2259e-03,
+            -6.7902e-03, -2.8095e-03, 1.2321e-02, 2.0950e-02, 1.0811e-02,
+            1.5457e-02, 1.3336e-02, 7.0143e-04, -5.6686e-03, 1.0834e-02,
+            4.1466e-03, 1.7258e-02, -8.0643e-03, 1.8673e-03, 2.7637e-03,
+            -1.2978e-02, 2.5772e-02, 4.3907e-03, 5.4108e-02, 2.4438e-05,
+            -6.9809e-03, -2.6917e-02, -8.9188e-03, -5.6000e-03, 1.3533e-03,
+            5.4131e-03, 4.0375e-02, 6.2256e-03, 1.6586e-02, 1.2150e-03,
+            -2.6428e-02, -2.7161e-02, -1.8723e-02, 2.0782e-02, -1.0582e-02,
+            5.2887e-02, 2.0390e-03, 6.8893e-03, -2.5436e-02, -1.3748e-02,
+            6.1989e-04, -9.1980e-02, 1.4862e-02, 4.0649e-02, -2.8534e-03,
+            -4.3121e-02, -3.5229e-03, -1.1902e-02, 2.4628e-02, -2.0157e-02,
+            -3.8891e-03, 4.9225e-02, -3.4027e-02, 5.0697e-03, 1.1597e-02,
+            2.6901e-02, 8.9874e-03, -1.2787e-02, 1.5249e-03, 3.8490e-03,
+            -1.4725e-02, 5.5122e-03, 2.3254e-02, -3.6030e-03, -1.5898e-03,
+            2.3239e-02, -2.4826e-02, 2.1408e-02, 3.0054e-01, 1.8051e-02,
+            -3.4668e-02, 8.3847e-03, -2.0004e-02, -1.3245e-02, 1.2749e-02,
+            -8.0872e-02, -2.3746e-03, 1.0681e-02, 4.4098e-02, 1.8219e-02,
+            -4.3518e-02, -1.7944e-02, 3.0670e-02, 1.0925e-02, 5.8441e-02,
+            -1.3458e-02, 3.8727e-02, 3.4733e-03, 4.8676e-03, -2.8305e-03,
+            1.0132e-02, -4.8447e-03, 1.4677e-03, -1.9562e-02, 4.2391e-04,
+            -1.2123e-02, 1.3275e-02, -3.1616e-02, -2.2018e-02, -2.4155e-02,
+            -2.0706e-02, 4.5013e-03, 2.1088e-02, 1.4191e-03, -1.5869e-02,
+            3.2074e-02, -1.5945e-02, -1.2255e-03, 1.0910e-02, 8.1406e-03,
+            6.5079e-03, -5.0323e-02, 8.2169e-03, 1.2451e-02, 8.8257e-02,
+            7.5340e-03, 3.6011e-02, 7.9269e-03, 7.1472e-02, 4.8981e-02,
+            2.4887e-02, -4.0932e-03, -2.7603e-02, -3.8940e-02, -2.0599e-02,
+            -1.2871e-02, 1.1482e-02, 9.9792e-03, 7.2937e-02, -2.8076e-02,
+            3.5004e-02, 1.0162e-01, -3.8940e-02, -5.7077e-04, 4.4220e-02,
+            3.2776e-02, -1.9569e-03, -5.9586e-03, 1.1497e-02, -1.9730e-02,
+            -2.1591e-02, 1.1635e-02, 1.2589e-02, 2.8613e-01, 2.3743e-02,
+            1.2108e-02, -2.5436e-02, -3.0470e-04, -2.4368e-02, 2.1561e-02,
+            9.0332e-03, 3.8788e-02, -7.8918e-02, -4.1122e-03, -5.7007e-02,
+            -1.0468e-02, 3.7346e-03, -4.7729e-02, -1.9547e-02, 1.8234e-02,
+            -4.9347e-02, -1.1612e-02, -8.5144e-02, -1.9012e-02, -2.0065e-02,
+            -1.3939e-02, -2.0599e-02, -7.7934e-03, -3.8879e-02, 1.4679e-02,
+            3.5645e-02, 4.3610e-02, -1.4618e-02, 1.3702e-02, 2.2766e-02,
+            1.6953e-02, 9.6802e-02, 2.0737e-02, 1.2787e-02, 1.3741e-02,
+            -4.2267e-03, 6.9160e-03, -1.7609e-02, 9.7427e-03, 3.1921e-02,
+            1.0994e-02, 1.9211e-02, -5.6763e-03, 1.0307e-02, -3.0609e-02,
+            1.7738e-03, -1.7380e-02, -2.0233e-02, 7.1678e-03, 1.6907e-02,
+            -1.8616e-02, 4.9896e-03, -2.8210e-03, 2.7756e-02, -7.8297e-04,
+            2.7523e-03, 1.6794e-03, -2.0279e-02, 3.5431e-02, 1.4694e-02,
+            -1.4030e-02, 6.4507e-03, 3.7842e-02, 2.2629e-02, -6.4270e-02,
+            -5.8937e-03, -6.2408e-03, -1.1620e-02, -2.8290e-02, 3.1494e-02,
+            -4.1016e-02, 1.1740e-03, -2.9354e-03, -2.0538e-02, -6.4325e-04,
+            6.3591e-03, -6.3477e-03, 1.2718e-02, -1.6418e-02, -1.0307e-02,
+            -4.3793e-02, 2.3590e-02, -3.6804e-02, -1.6830e-02, 3.2257e-02,
+            5.2223e-03, -3.0289e-02, 4.2877e-03, 7.8735e-03, -5.3223e-02,
+            1.4595e-02, -2.4597e-02, 1.1616e-03, -1.6937e-02, 1.4702e-02,
+            -5.2185e-03, -1.2798e-03, -4.8065e-02, -2.0645e-02, -8.1253e-03,
+            8.8196e-03, 7.2899e-03, 8.0261e-03, -8.9966e-02, 2.0966e-02,
+            -5.2948e-02, -2.4734e-02, 9.8877e-03, 5.4108e-02, -1.0078e-02,
+            -1.3092e-02, -9.3201e-02, -2.0844e-02, -3.8166e-03, -1.3437e-03,
+            -2.6474e-02, -4.4289e-03, 6.0997e-03, 4.9286e-02, -2.3544e-02,
+            5.4199e-02, 4.4128e-02, -6.8665e-03, -3.2013e-02, -4.4708e-03,
+            -1.0567e-02, 1.1940e-02, -3.3661e-02, 1.0307e-02, -1.4496e-02,
+            2.0966e-02, -2.0638e-03, -1.6434e-02, -4.2391e-04, -3.7632e-03,
+            -1.2947e-02, 5.0468e-03, 1.3268e-02, -1.9806e-02, 2.0355e-02,
+            3.4088e-02, -5.4970e-03, -2.5467e-02, -2.6291e-02, -9.1248e-03,
+            1.3435e-02, 2.1265e-01, -2.7161e-02, -2.4536e-02, -2.0172e-02,
+            -8.5449e-03, -1.7120e-02, -6.5735e-02, 3.1464e-02, 2.8076e-02,
+            4.0741e-02, -3.2406e-03, 9.0027e-03, -2.3346e-02, 1.3145e-02,
+            4.9652e-02, -6.1150e-03, -4.6478e-02, -1.0612e-02, 2.6825e-02,
+            -7.5760e-03, -3.0327e-03, 1.0338e-02, -1.5404e-02, -7.7209e-02,
+            2.3560e-02, 7.4692e-03, 2.7679e-02, -4.9095e-03, -5.4718e-02,
+            1.2558e-02, -8.5815e-02, 2.4887e-02, 2.6306e-02, 2.7954e-02,
+            1.6586e-02, -8.6451e-04, -1.7654e-02, 1.5701e-02, 8.8074e-02,
+            1.8463e-02, -3.2745e-02, 2.2629e-02, 9.0179e-03, -5.4970e-03,
+            -2.0432e-02, 2.8534e-03, -1.6495e-02, 1.6327e-02, -7.7477e-03,
+            -6.0158e-03, -6.4888e-03, -1.2131e-02, 2.0416e-02, -1.9409e-02,
+            6.6376e-03, 2.6291e-02, -4.3854e-02, 1.7258e-02, -1.1864e-03,
+            -7.4921e-03, -2.4460e-02, -8.2016e-03, 2.1698e-02, 9.8495e-03,
+            -9.6359e-03, 5.2032e-03, 1.0284e-02, -4.4739e-02, 6.3286e-03,
+            2.5726e-02, -2.8702e-02, 1.1091e-03, -1.0818e-02, 3.9864e-03,
+            -1.6571e-02, -1.1581e-02, -1.5099e-02, -4.6387e-02, 5.2795e-03,
+            -4.7302e-04, 6.7566e-02, 1.4404e-02, -2.3117e-02, 1.7838e-02,
+            2.2858e-02, -1.5388e-02, 2.9404e-02, -3.3997e-02, -1.7944e-02,
+            -9.4910e-03, -3.8391e-02, 1.8173e-02, -8.2169e-03, 3.1891e-02,
+            5.4207e-03, -2.7008e-02, 5.1239e-02, 5.1842e-03, -2.4872e-02,
+            3.5645e-02, -5.7297e-03, -6.1989e-04, 1.9058e-02, -3.5706e-02,
+            6.3095e-03, -3.8666e-02, 1.4191e-03, 1.0376e-02, 1.4885e-02,
+            2.0447e-02, -1.0757e-02, -2.9349e-04, -2.5768e-03, -8.8730e-03,
+            -5.6267e-04, 1.6754e-02, -3.2928e-02, 2.7664e-02, -5.1666e-02,
+            -3.1799e-02, 6.3438e-03, -8.1539e-05, 2.2293e-02, 6.6895e-02,
+            1.9379e-02, -1.0269e-02, -2.7809e-03, -2.6077e-02, 4.1733e-03,
+            -1.2474e-02, -2.9007e-02, -1.5945e-02, 0.0000e+00, -2.5345e-02,
+            -1.7014e-02, -3.1769e-02, -8.9722e-03, 1.5503e-02, -3.1677e-02,
+            -1.9646e-03, 2.0020e-02, 1.8616e-02, -1.1511e-01, 3.5820e-03,
+            -6.2981e-03, -2.5208e-02, 4.0527e-02, 8.9693e-04, 2.2079e-02,
+            -1.6296e-02, -1.7883e-02, 3.1021e-02, 1.9407e-03, -1.0480e-01,
+            2.2736e-02, -2.7985e-02, 3.5889e-02, 1.1168e-03, 1.9180e-02,
+            -1.6724e-02, -3.3783e-02, -1.3161e-02, -2.5269e-02, 1.2207e-03,
+            2.9770e-02, -1.2344e-02, 6.0310e-03, -6.5979e-02, -5.8022e-03,
+            4.1168e-02, -8.0795e-03, 1.7452e-03, 2.1133e-02, 2.0737e-02,
+            1.4549e-02, -3.4454e-02, -2.3926e-02, 9.0075e-04, -2.6443e-02,
+            5.5771e-03, -6.9695e-03, 2.9297e-03, 3.4668e-02, 1.5659e-03,
+            6.1951e-03, -1.9836e-02, 1.9569e-03, -4.1687e-02, -4.1382e-02,
+            -7.5378e-02, -1.1505e-02, 2.2522e-02, 2.8641e-02, 1.3206e-02,
+            -1.9470e-02, 8.8882e-03, -2.2034e-02, 1.2695e-02, 1.6296e-02,
+            -1.0284e-02, 1.5327e-02, -9.0561e-03, -4.6310e-03, 1.0254e-02,
+            9.0179e-03, -5.9700e-03, -5.9937e-02, 3.6621e-02, -1.3626e-02,
+            1.6037e-02, 9.7871e-05, -1.7776e-03, -5.1300e-02, 1.8635e-03,
+            -4.8752e-03, 1.7838e-02, -1.3603e-02, -5.4436e-03, 1.6205e-02,
+            -4.2999e-02, -3.2867e-02, -4.1870e-02, 8.4782e-04, 2.0187e-02,
+            -3.0651e-03, -4.4098e-02, -3.9978e-02, -3.3295e-02, 1.4565e-02,
+            -5.4596e-02, -8.7585e-03, 1.3046e-03, -1.8982e-02, 3.3752e-02,
+            4.2816e-02, 1.3781e-03, 2.0615e-02, -5.7281e-02, 3.6285e-02,
+            -9.0485e-03, 1.6205e-02, 1.0498e-02, -3.6072e-02, 2.1191e-03,
+            1.7792e-02, 5.4230e-02, -2.7054e-02, 2.2583e-02, -1.6602e-02,
+            -2.2335e-03, -2.9663e-02, -8.9645e-03, -1.9333e-02, -2.4902e-02,
+            9.3746e-04, 5.2681e-03, 3.6652e-02, -1.5320e-02, -1.0841e-02,
+            -5.4718e-02, 2.5574e-02, -3.2349e-02, 5.4131e-03, -3.8300e-02,
+            2.1423e-02, -1.4488e-02, -6.1951e-03, -9.8038e-03, -2.8107e-02,
+            2.3315e-02, 6.0272e-02, 6.7139e-03, -2.5833e-02, 1.7929e-02,
+            -9.6130e-03, 6.6948e-03, 2.5162e-02, 3.2604e-05, 2.3438e-02,
+            -1.7899e-02, 4.7951e-03, -1.5087e-03, -1.7900e-03, -1.9653e-02,
+            1.2611e-02, -2.0905e-02, -1.4793e-02, -2.5768e-03, 3.6102e-02,
+            -3.9429e-02, -7.3929e-03, 1.1635e-02, -7.4768e-03, -8.9233e-02,
+            2.2263e-02, -3.4668e-02, -3.1952e-02, -9.9468e-04, -1.7334e-02,
+            -4.9072e-02, 3.5217e-02, -1.0796e-02, -3.8849e-02, 7.6485e-03,
+            -4.2725e-02, 2.1927e-02, 3.8567e-03, 2.5726e-02, 3.9864e-03,
+            1.3306e-02, -5.2452e-03, -1.2312e-03, 2.5391e-02, -2.3438e-02,
+            -1.6449e-02, 2.6611e-02, -1.0582e-02, 1.8372e-02, 1.6861e-02,
+            3.6346e-02, -1.3741e-02, -2.5558e-02, 2.8244e-02, 2.4597e-02,
+            3.8166e-03, -7.8583e-03, 3.5858e-02, -8.9645e-03, -6.6414e-03,
+            -1.6956e-03, 2.5452e-02, -1.4294e-01, 1.0109e-02, 4.0703e-03,
+            -1.7380e-02, 3.1433e-02, 1.0391e-02, -9.6588e-03, -6.7520e-03,
+            5.5122e-03, -4.1229e-02, 1.3943e-03, 1.5671e-02, 1.0292e-02,
+            2.2476e-02, -1.0657e-01, 4.8004e-02, 6.3934e-03, 1.9196e-02,
+            9.5581e-02, -5.0392e-03, -1.2474e-02, 1.2482e-02, 2.7191e-02,
+            2.4734e-02, 2.2552e-02, 7.6233e-02, -3.1796e-03, 3.1677e-02,
+            -1.5175e-02, 3.6865e-02, 1.9592e-02, 1.7487e-02, 2.7100e-02,
+            1.5900e-02, 3.4515e-02, 3.2196e-02, -1.9730e-02, -5.5199e-03,
+            2.5864e-02, -5.7411e-03, 2.0187e-02, 1.2585e-01, 2.0905e-02,
+            -1.3206e-02, 3.8967e-03, -1.8936e-02, -6.6853e-04, 8.2169e-03,
+            9.3689e-03, 2.9617e-02, 7.9498e-03, -2.7786e-02, 1.7517e-02,
+            -2.7084e-02, -2.9617e-02, -1.6098e-02, 1.5039e-01, 1.1663e-03,
+            1.2169e-02, 6.3744e-03, 3.3020e-02, -2.6741e-03, 1.0414e-03,
+            2.2446e-02, -2.2202e-03, -2.7435e-02, -3.9459e-02, 3.2562e-02,
+            8.8440e-02, 1.1169e-02, 5.2429e-02, 4.8920e-02, 7.9956e-02,
+            5.9204e-03, 1.1200e-02, -1.0880e-02, 2.0996e-02, 2.8183e-02,
+            9.1028e-04, -2.5955e-02, 4.2391e-04, -1.9272e-02, 3.5065e-02,
+            -1.7261e-01, 3.3508e-02, 5.4626e-03, -6.3438e-03, -1.4801e-02,
+            -1.5701e-02, -5.0415e-02, 2.7298e-02, -1.1223e-02, 1.6312e-02,
+            1.5152e-02, 4.5738e-03, -9.5062e-03, 4.8065e-02, 8.0322e-02,
+            -4.0344e-02, 7.9107e-04, 1.5160e-02, 4.5433e-03, -1.5297e-02,
+            3.6850e-03, -3.1128e-02, 1.6113e-02, -5.2338e-03, 2.0542e-03,
+            -1.3107e-02, 6.0349e-03, -4.8492e-02, 1.1749e-02, -2.5696e-02,
+            -5.4626e-03, -4.1473e-02, -1.0239e-02, -2.1591e-03, -6.3095e-03,
+            -3.1586e-02, -6.0028e-02, -4.4441e-03, 5.7101e-05, -3.8635e-02,
+            1.8982e-02, -2.5574e-02, -1.7452e-03, 2.1027e-02, 3.2935e-01,
+            -2.5833e-02, -5.3978e-03, -1.2657e-02, -2.7676e-03, -3.4576e-02,
+            -3.3661e-02, 1.0598e-04, -2.8944e-04
+        ]]))
+
+    SiGLIP_TEXT_EMBEDDING = np.squeeze(np.array([
+        [
+            -2.0950e-02, 1.7624e-02, 2.1629e-03, -3.2532e-02, 5.3062e-03,
+            2.9190e-02, 9.8343e-03, 8.5068e-03, -2.5589e-02, 2.5234e-03,
+            -3.1738e-02, 4.4891e-02, 2.0340e-02, 2.6886e-02, 4.4975e-03,
+            1.6861e-02, 2.9129e-02, 4.5891e-03, -2.2766e-02, -9.7513e-04,
+            -1.3374e-02, 3.5522e-02, 1.6068e-02, -8.5678e-03, 1.8433e-02,
+            -4.3297e-03, 3.1796e-03, 2.1687e-03, 1.0920e-03, 2.6489e-02,
+            -1.6602e-02, -6.8130e-03, 1.5404e-02, -1.6602e-02, 1.8112e-02,
+            1.2741e-02, -2.4948e-02, -1.3481e-02, 9.2392e-03, 1.1703e-02,
+            -3.1528e-03, 2.0737e-02, -1.4839e-03, 2.8801e-03, -2.0035e-02,
+            5.1498e-04, 3.5645e-02, -9.9792e-03, 2.3041e-02, 2.1713e-02,
+            -2.8717e-02, -2.1530e-02, -2.1683e-02, 1.7212e-02, 1.3466e-02,
+            -5.3406e-04, 2.0218e-03, 4.4250e-03, 3.9917e-02, -1.4130e-02,
+            -2.5578e-03, -8.3160e-03, 2.2430e-02, 2.2263e-02, 1.2230e-02,
+            -1.1475e-02, 7.8812e-03, -1.7118e-04, -2.1271e-02, 2.8717e-02,
+            2.0966e-02, 8.8120e-03, -2.0554e-02, -1.2035e-03, 6.8016e-03,
+            3.4271e-02, -1.8585e-02, -2.8824e-02, 6.5689e-03, 2.2392e-03,
+            -1.5778e-02, -1.3603e-02, 1.3745e-04, -5.0323e-02, -1.5656e-02,
+            -2.8625e-02, 2.9434e-02, -1.4114e-02, -3.3447e-02, 1.9318e-02,
+            -1.2947e-02, 5.2910e-03, 3.6469e-03, 2.6016e-02, -1.0941e-02,
+            1.8738e-02, 2.3453e-02, -6.4812e-03, 5.9479e-02, 3.0594e-02,
+            1.8433e-02, -1.1955e-02, 2.2110e-02, 1.1513e-02, 1.4771e-02,
+            -1.4572e-02, 3.8239e-02, 2.0767e-02, 2.0264e-02, 6.5956e-03,
+            1.1711e-03, -2.2919e-02, 3.4332e-02, 1.1246e-02, 3.5076e-03,
+            1.8936e-02, 1.2108e-02, 1.6737e-03, -1.2985e-02, 1.8402e-02,
+            -1.4372e-03, -1.7685e-02, -6.4844e-01, 2.4094e-02, 1.7517e-02,
+            8.0948e-03, -2.6337e-02, 4.3755e-03, -8.2703e-03, -7.4120e-03,
+            3.5126e-02, 1.7075e-02, -1.7502e-02, 1.6586e-02, -3.1860e-02,
+            5.7335e-03, -1.9577e-02, 1.1749e-02, -5.6229e-03, 2.9617e-02,
+            -6.2943e-03, 9.4452e-03, -3.3661e-02, 1.2886e-02, -6.1569e-03,
+            9.6083e-04, 1.3685e-03, 1.4412e-02, -2.0203e-02, -2.0477e-02,
+            8.9569e-03, -8.3389e-03, 2.4170e-02, 3.1948e-03, 1.0010e-02,
+            2.8214e-02, -3.0533e-02, 2.2186e-02, 2.0814e-04, 7.1449e-03,
+            -8.0795e-03, -1.0147e-02, -2.2934e-02, -2.3746e-03, -9.3536e-03,
+            -1.2817e-02, -2.6886e-02, 2.6665e-03, -2.8519e-02, 3.7289e-03,
+            -7.3280e-03, 4.0070e-02, 1.2207e-02, -2.2537e-02, 8.1253e-03,
+            8.1253e-03, -3.9307e-02, -9.0408e-03, -9.0561e-03, 5.8624e-02,
+            -1.9424e-02, -1.7365e-02, -1.8177e-03, -1.5457e-02, 2.0676e-02,
+            -5.9586e-03, 2.0462e-02, -6.7863e-03, -3.0174e-03, 1.5976e-02,
+            1.4885e-02, -6.5994e-03, 2.4246e-02, -3.0502e-02, 1.6357e-02,
+            -1.5564e-03, -2.0859e-02, 2.1164e-02, 1.3565e-02, 4.1175e-04,
+            1.3084e-03, 1.8204e-02, 4.1504e-03, -2.5833e-02, 1.5266e-02,
+            -1.1818e-02, 4.7379e-03, 1.1391e-02, 2.1347e-02, -5.9395e-03,
+            1.3474e-02, -2.2018e-02, 4.0512e-03, 3.0197e-02, 1.5602e-03,
+            -5.1270e-03, 2.7847e-02, 1.6785e-02, 7.1068e-03, 2.6588e-03,
+            -8.9111e-03, 2.4475e-02, 2.7359e-02, 3.4607e-02, -2.3636e-02,
+            -2.4170e-02, 6.6872e-03, 7.9575e-03, -3.8483e-02, 3.3752e-02,
+            -1.6403e-02, 1.0437e-02, 7.6103e-03, -5.6915e-02, 1.2253e-02,
+            6.7234e-05, 9.6359e-03, -4.8332e-03, 1.5068e-02, 2.7557e-02,
+            3.3796e-05, -1.9569e-03, -5.1003e-03, -8.5449e-03, 3.0716e-02,
+            3.1967e-03, 2.2793e-03, 2.8580e-02, -4.9324e-03, 1.6129e-02,
+            -1.2085e-02, 2.0630e-02, -1.8051e-02, -7.1812e-04, 2.3560e-02,
+            -5.5199e-03, 6.8604e-02, -1.3451e-02, -2.2690e-02, -5.0659e-02,
+            1.4565e-02, -1.2791e-04, -1.5221e-02, -1.7166e-02, 2.2217e-02,
+            5.1651e-03, -2.2049e-02, -2.7802e-02, 6.6757e-03, -3.0151e-02,
+            -3.9825e-03, -1.8127e-02, -1.2531e-03, -9.6588e-03, -1.0483e-02,
+            2.0767e-02, -3.4607e-02, -1.9150e-02, 1.4084e-02, -2.4071e-03,
+            -3.2104e-02, -3.5248e-02, -4.5837e-02, 1.5961e-02, -2.5436e-02,
+            -2.9938e-02, -2.1591e-03, -2.0416e-02, 1.6388e-02, -6.8092e-03,
+            1.6556e-02, -3.0251e-03, 9.6970e-03, -7.1297e-03, 1.9928e-02,
+            -5.6122e-02, 9.2649e-04, -2.1076e-03, -2.0752e-02, 2.8648e-03,
+            -2.1118e-02, -4.0741e-03, 4.4128e-02, 5.1575e-03, -9.3002e-03,
+            2.3956e-02, -6.8626e-03, -7.8049e-03, -2.1439e-02, -1.3268e-02,
+            3.4790e-02, 4.1046e-03, -2.1591e-02, 1.7347e-03, 1.7975e-02,
+            1.4465e-02, -6.4163e-03, -3.9581e-02, 7.9880e-03, -1.8845e-02,
+            -3.3356e-02, 3.3966e-02, -2.8015e-02, -3.8239e-02, -3.4142e-03,
+            -1.1772e-02, 2.0313e-03, 1.6907e-02, 5.0354e-03, -2.1545e-02,
+            -7.8354e-03, -1.0483e-02, 1.5320e-02, 1.0460e-02, -3.6194e-02,
+            5.2032e-02, -1.6113e-02, -1.4412e-02, -1.6159e-02, 1.8631e-02,
+            -3.9177e-03, -9.9564e-03, 2.9770e-02, -2.1820e-03, -4.3152e-02,
+            -4.3152e-02, -1.7303e-02, -4.1122e-03, -9.2087e-03, 6.1264e-03,
+            2.7359e-02, -2.8305e-02, 1.4641e-02, 1.3832e-02, -4.2038e-03,
+            1.9577e-02, 3.2745e-02, -8.6441e-03, -3.4332e-02, 9.0561e-03,
+            -4.6692e-03, -8.7280e-03, 4.0550e-03, 1.5579e-02, -1.1169e-02,
+            -7.3385e-04, -7.6408e-03, -1.9043e-02, -9.0942e-03, 1.7250e-04,
+            2.1408e-02, 3.0609e-02, 3.4313e-03, -5.5237e-03, 1.0376e-02,
+            -1.5579e-02, -5.3902e-03, -2.5826e-03, -4.1504e-03, -6.5369e-02,
+            -1.5686e-02, -1.6098e-02, 7.0114e-03, -2.9190e-02, 1.2917e-02,
+            1.8524e-02, 7.0238e-04, 1.3397e-02, 8.9798e-03, 4.6387e-02,
+            -5.6732e-02, 1.7517e-02, -1.9775e-02, -2.5986e-02, 3.8330e-02,
+            4.0070e-02, 3.2349e-02, 2.1194e-02, -1.8448e-02, 1.4687e-02,
+            1.3533e-03, 2.5864e-02, 5.3749e-03, -1.9638e-02, -3.4698e-02,
+            1.0796e-02, 1.7319e-02, -1.3618e-02, -4.1107e-02, -8.7967e-03,
+            -3.4180e-02, -7.2327e-03, -1.1932e-02, -4.8523e-02, 3.1830e-02,
+            8.3694e-03, -5.6992e-03, 1.1078e-02, -2.1515e-02, -1.9836e-02,
+            -9.1400e-03, 9.2545e-03, -1.5701e-02, -1.0826e-02, 3.2227e-02,
+            6.6710e-04, 1.2260e-02, 6.6833e-03, -2.6962e-02, -1.5688e-03,
+            1.4389e-02, 6.1989e-04, 2.5034e-04, -4.1840e-02, -6.7101e-03,
+            1.3901e-02, -1.7105e-02, -5.0163e-03, 4.2999e-02, -1.3351e-02,
+            1.4336e-02, -8.5678e-03, 1.5087e-03, 8.1329e-03, -2.6382e-02,
+            -2.6093e-02, -6.9809e-03, 3.8696e-02, 1.1848e-02, 3.2623e-02,
+            -1.5221e-02, -2.5421e-02, 3.8948e-03, 2.1988e-02, 3.4332e-03,
+            -2.9411e-03, 3.5339e-02, 1.4977e-02, 2.1687e-03, 2.7557e-02,
+            -2.3941e-02, -4.4899e-03, -3.0960e-02, 1.9485e-02, 8.6136e-03,
+            1.5274e-02, 1.0406e-02, -7.5607e-03, -1.2009e-02, 5.6496e-03,
+            1.3771e-02, 4.3152e-02, -3.3932e-03, 2.2697e-03, 2.6245e-02,
+            1.0765e-02, -2.5253e-02, 2.5314e-02, 5.6229e-03, 1.1154e-02,
+            5.0926e-03, -6.1371e-02, 3.1250e-02, -7.9880e-03, -1.9089e-02,
+            2.1133e-03, -6.1874e-03, -1.8539e-02, 1.7487e-02, 1.0849e-02,
+            -1.2909e-02, 1.4381e-02, 2.4704e-02, -1.7670e-02, -4.2725e-03,
+            -3.6133e-02, -3.5152e-03, -6.1274e-04, -2.0462e-02, 3.9337e-02,
+            -5.0583e-03, 7.3090e-03, -7.9803e-03, 1.7517e-02, 1.0918e-02,
+            -3.0502e-02, -4.2999e-02, -3.3112e-02, -2.5940e-02, 1.7212e-02,
+            3.6278e-03, -1.0330e-02, 4.5586e-03, 1.1345e-02, -5.2719e-03,
+            1.2871e-02, 4.4586e-02, 1.3603e-02, 2.3026e-02, -1.5915e-02,
+            -4.1008e-03, -2.2644e-02, 5.1956e-03, -4.0779e-03, -3.0441e-02,
+            -2.3300e-02, -4.3060e-02, -2.1851e-02, 1.7136e-02, -1.6525e-02,
+            -2.5681e-02, 4.9629e-03, 1.3176e-02, 2.0542e-03, 1.2627e-02,
+            -5.4817e-03, 4.3607e-04, 3.7018e-02, -1.1848e-02, -1.6479e-02,
+            2.3899e-03, 1.3931e-02, -5.4131e-03, -4.9820e-03, -9.3689e-03,
+            -1.4229e-02, -7.8506e-03, -5.2338e-02, -4.3488e-03, -3.2257e-02,
+            -3.1311e-02, 1.3397e-02, -2.4292e-02, 3.1395e-03, -1.9363e-02,
+            1.6022e-02, 1.8890e-02, 3.8834e-03, -1.5556e-02, 2.6245e-02,
+            -1.5945e-03, -1.9211e-02, 2.9411e-03, -2.5360e-02, -1.1337e-02,
+            2.1706e-03, 1.2909e-02, -9.5367e-03, 1.2657e-02, 1.5495e-02,
+            -2.3758e-02, 1.0422e-02, 8.3847e-03, 3.6163e-02, -9.2649e-04,
+            1.5564e-03, 1.3855e-02, -3.0487e-02, -1.6983e-02, 3.0640e-02,
+            -1.8448e-02, 1.6632e-02, 9.1553e-03, 2.3483e-02, 9.8648e-03,
+            -5.6992e-03, 1.4076e-02, 1.4633e-02, -7.8812e-03, -8.7357e-03,
+            -1.6998e-02, -1.0765e-02, -2.5375e-02, -1.3428e-02, -2.7435e-02,
+            1.9669e-02, 2.9999e-02, -2.1042e-02, -8.4915e-03, 3.7727e-03,
+            3.0396e-02, 8.8120e-03, -2.0370e-03, -2.7054e-02, 2.3071e-02,
+            1.3878e-02, 4.1924e-03, -1.1360e-02, -1.1396e-04, -1.6510e-02,
+            1.1536e-02, 3.2558e-03, -6.0158e-03, 1.9043e-02, -7.7553e-03,
+            4.7035e-03, -8.8272e-03, 5.4893e-03, 8.9264e-03, 1.0490e-02,
+            1.5579e-02, 1.7750e-04, 3.0975e-03, 3.4943e-02, 3.6987e-02,
+            3.3386e-02, -2.6688e-02, 4.7340e-03, -4.0222e-02, 6.8855e-04,
+            7.3357e-03, -4.5815e-03, -2.9877e-02, 1.2505e-02, 8.9264e-04,
+            5.0732e-01, -3.5820e-03, 3.1799e-02, -8.5297e-03, -1.5625e-02,
+            1.5205e-02, 7.5111e-03, 8.1863e-03, -4.0009e-02, -1.0246e-02,
+            -1.2306e-02, 2.1912e-02, 4.4861e-03, -3.4943e-02, 7.6714e-03,
+            -6.0043e-03, 1.5701e-02, 2.7664e-02, 1.0424e-03, 2.0050e-02,
+            1.1124e-02, 3.4424e-02, 2.7390e-02, -2.7065e-03, 1.9745e-02,
+            -6.1760e-03, 1.5144e-02, -1.0460e-02, 3.9429e-02, -3.6774e-03,
+            3.4576e-02, -2.8992e-03, -1.6037e-02, 4.3793e-03, 2.1667e-02,
+            -8.7967e-03, 1.9424e-02, 1.5205e-02, 1.8661e-02, -2.0050e-02,
+            -1.6449e-02, -2.2842e-02, -1.5106e-03, 1.5888e-03, 2.3956e-02,
+            1.2222e-02, 2.8961e-02, 1.2755e-04, -2.0660e-02, -5.6801e-03,
+            -3.5095e-02, 2.1439e-03, -5.1758e-02, 2.1496e-03, -9.6512e-03,
+            3.2074e-02, -1.1459e-02, -7.7019e-03, 2.4471e-03, 7.5483e-04,
+            1.7223e-03, 3.7537e-02, 8.6594e-03, 1.4252e-02, 9.6741e-03,
+            3.7811e-02, -2.4261e-02, 1.1055e-02, 2.3956e-02, 8.1711e-03,
+            8.9417e-03, 1.9424e-02, 1.7441e-02, 1.1650e-02, -7.4863e-05,
+            -9.2010e-03, -1.1192e-02, -1.6449e-02, -2.8473e-02, -2.7435e-02,
+            -2.9938e-02, -5.6419e-03, -1.5327e-02, -1.7914e-02, 1.2459e-02,
+            -2.0706e-02, -3.4370e-03, -1.3199e-02, -5.9662e-02, 2.3743e-02,
+            -3.3081e-02, -8.3923e-03, -8.6441e-03, -2.8870e-02, 3.2623e-02,
+            2.6642e-02, -3.0807e-02, -2.4548e-03, -4.6753e-02, 1.9241e-02,
+            2.7237e-02, 5.4703e-03, -6.4201e-03, -1.2962e-02, -3.8971e-02,
+            3.2101e-03, 2.6230e-02, -3.1952e-02, -2.8824e-02, 1.3809e-02,
+            5.5634e-02, -7.3814e-03, -1.3161e-02, -1.1284e-02, -1.5556e-02,
+            -1.0925e-02, -1.4214e-02, 1.6251e-02, 3.2501e-02, -2.5070e-02,
+            1.1589e-02, -2.8667e-03, 7.2556e-03, 7.4120e-03, 8.1940e-03,
+            2.4475e-02, 3.7994e-02, 3.8452e-02, -6.1913e-03, -3.5739e-04,
+            -3.1860e-02, -1.2660e-04, -3.9005e-03
+        ]
+    ]))
+
+    def setUp(self):
+        clear_loaded_models()
+
+    def tearDown(self):
+        clear_loaded_models()
+
+    def test_MarqoFashionSigLIPModel_load(self):
+        """Test the load method of MarqoFashionCLIPModel."""
+
+        model_tag = "open_clip/ViT-B-16-SigLIP/marqo-fashionSigLIP"
+        model_properties = OPEN_CLIP_MODEL_PROPERTIES[model_tag]
+        model = OPEN_CLIP(model_tag, device="cpu", model_properties=model_properties)
+
+        model.load()
+
+        self.assertIsNotNone(model.model)
+        self.assertIsNotNone(model.tokenizer)
+        self.assertIsNotNone(model.preprocess)
+
+        marqo_image_embeddings = np.squeeze(np.array(model.encode(self.IMAGE_CONTENT)))
+        marqo_text_embeddings = np.squeeze(np.array(model.encode(self.TEXT_CONTENT)))
+
+        image_difference = np.linalg.norm(marqo_image_embeddings - self.SiGLIP_IMAGE_EMBEDDING) / len(
+            marqo_image_embeddings)
+        text_difference = np.linalg.norm(marqo_text_embeddings - self.SiGLIP_TEXT_EMBEDDING) / len(
+            marqo_text_embeddings)
+
+        self.assertLess(image_difference, 1e-4, f"Image embeddings are not close enough. "
+                                                f"The average difference is: {image_difference}")
+        self.assertLess(text_difference, 1e-4, f"Text embeddings are not close enough. "
+                                               f"The average difference is: {text_difference}")
+
+    def test_MarqoFashionCLIPModel_load(self):
+        """Test the load method of MarqoFashionCLIPModel."""
+
+        model_tag = "open_clip/ViT-B-16/marqo-fashionCLIP"
+        model_properties = OPEN_CLIP_MODEL_PROPERTIES[model_tag]
+        model = OPEN_CLIP(model_tag, device="cpu", model_properties=model_properties)
+
+        model.load()
+
+        self.assertIsNotNone(model.model)
+        self.assertIsNotNone(model.tokenizer)
+        self.assertIsNotNone(model.preprocess)
+
+        marqo_image_embeddings = np.squeeze(np.array(model.encode(self.IMAGE_CONTENT)))
+        marqo_text_embeddings = np.squeeze(np.array(model.encode(self.TEXT_CONTENT)))
+
+        image_difference = (np.linalg.norm(marqo_image_embeddings - self.FASHIONCLIP_IMAGE_EMBEDDING)
+                            / len(marqo_image_embeddings))
+        text_difference = (np.linalg.norm(marqo_text_embeddings - self.FASHIONCLIP_TEXT_EMBEDDING)
+                           / len(marqo_text_embeddings))
+
+        self.assertLess(image_difference, 1e-4, f"Image embeddings are not close enough. "
+                                                f"The average difference is: {image_difference}")
+        self.assertLess(text_difference, 1e-4, f"Text embeddings are not close enough. "
+                                               f"The average difference is: {text_difference}")
diff --git a/tests/s2_inference/open_clip_models/test_open_clip_model_load.py b/tests/s2_inference/open_clip_models/test_open_clip_model_load.py
new file mode 100644
index 000000000..8c88278da
--- /dev/null
+++ b/tests/s2_inference/open_clip_models/test_open_clip_model_load.py
@@ -0,0 +1,149 @@
+from unittest import TestCase
+from unittest.mock import patch, MagicMock
+
+from marqo.s2_inference.clip_utils import OPEN_CLIP
+from marqo.s2_inference.configs import ModelCache
+from marqo.s2_inference.model_registry import _get_open_clip_properties
+
+OPEN_CLIP_MODEL_PROPERTIES = _get_open_clip_properties()
+
+
+class TestOpenCLIPModelLoad(TestCase):
+    """A test suite for loading OpenCLIP models.
+
+    The model loading logic for OpenCLIP models in Marqo can be categorized into the following steps in order of:
+    1. If the `url` or `modelLocation` is provided in the model properties, download the model from the specified
+    location and load the checkpoint.
+    2. If the `name` of model properties is provided, and it starts with `hf-hub`, load the model from the Hugging Face.
+    3. Otherwise, load the model as a registered model in the model registry.
+    """
+
+    def test_load_OpenCLIPModelFromCheckPointMethod_success(self):
+        """Test loading an OpenCLIP model from a checkpoint is called when providing a url in the model properties."""
+        model_name = "my_test_model"
+        model_properties = {
+            "name": "ViT-B-32",
+            "url": "https://openclipart.org/download/12345/my_test_model.pt",
+            "type": "open_clip"
+        }
+
+        with patch("marqo.s2_inference.clip_utils.OPEN_CLIP._load_model_and_image_preprocessor_from_checkpoint", \
+                   return_value=(MagicMock(), None)) as mock_load_method:
+            with patch("marqo.s2_inference.clip_utils.OPEN_CLIP._load_tokenizer_from_checkpoint",
+                       return_value=None) as mock_load_tokenizer:
+                with patch.object(MagicMock(), 'eval', return_value=None) as mock_eval:
+                    model = OPEN_CLIP(model_name, model_properties=model_properties, device="cpu")
+                    model.load()
+                    mock_load_method.assert_called_once()
+                    mock_load_tokenizer.assert_called_once()
+
+    def test_load_OpenCLIPModelFromCheckPointParameters_success(self):
+        """Test correct parameters are passed to the OpenCLIP loading from checkpoint method."""
+        model_tag = "my_test_model"
+        model_properties = {
+            "name": "ViT-B-108",
+            "url": "https://openclipart.org/download/12345/my_test_model.pt",
+            "type": "open_clip"
+        }
+        with patch("marqo.s2_inference.clip_utils.open_clip.create_model", return_value=MagicMock()) \
+                as mock_create_model:
+            with patch("marqo.s2_inference.clip_utils.open_clip.get_tokenizer", return_value=MagicMock()) \
+                    as mock_tokenizer:
+                with patch("marqo.s2_inference.clip_utils.download_model", return_value="my_test_model.pt"):
+                    with patch.object(MagicMock(), 'eval', return_value=None) as mock_eval:
+                        model = OPEN_CLIP(model_tag, model_properties=model_properties, device="cpu")
+                        model.load()
+                        mock_create_model.assert_called_once_with(
+                            model_name="ViT-B-108",
+                            jit=False,
+                            pretrained="my_test_model.pt",
+                            precision="fp32", device="cpu",
+                            cache_dir=ModelCache.clip_cache_path
+                        )
+                        mock_tokenizer.assert_called_once_with("ViT-B-108")
+                        preprocess_config = model.preprocess_config
+                        self.assertEqual(224, preprocess_config.size)
+                        self.assertEqual("RGB", preprocess_config.mode)
+                        self.assertEqual((0.48145466, 0.4578275, 0.40821073), preprocess_config.mean)
+                        self.assertEqual((0.26862954, 0.26130258, 0.27577711), preprocess_config.std)
+                        self.assertEqual("bicubic", preprocess_config.interpolation)
+                        self.assertEqual("shortest", preprocess_config.resize_mode)
+                        self.assertEqual(0, preprocess_config.fill_color)
+
+    def test_load_OpenCLIPModelFromCheckPointPreprocessConfig(self):
+        """Test correct parameters are passed to the OpenCLIP loading from checkpoint method."""
+        model_tag = "my_test_model"
+        model_properties = {
+            "name": "test-siglip",
+            "url": "https://openclipart.org/download/12345/my_test_model.pt",
+            "type": "open_clip",
+            "image_preprocessor": "SigLIP",
+            "size": 322  # Override the default size 224
+        }
+        with patch("marqo.s2_inference.clip_utils.open_clip.create_model", return_value=MagicMock()) \
+                as mock_create_model:
+            with patch("marqo.s2_inference.clip_utils.open_clip.get_tokenizer", return_value=MagicMock()) \
+                    as mock_tokenizer:
+                with patch("marqo.s2_inference.clip_utils.download_model", return_value="my_test_model.pt"):
+                    with patch.object(MagicMock(), 'eval', return_value=None) as mock_eval:
+                        model = OPEN_CLIP(model_tag, model_properties=model_properties, device="cpu")
+                        model.load()
+                        mock_create_model.assert_called_once_with(
+                            model_name="test-siglip",
+                            jit=False,
+                            pretrained="my_test_model.pt",
+                            precision="fp32", device="cpu",
+                            cache_dir=ModelCache.clip_cache_path
+                        )
+                        mock_tokenizer.assert_called_once_with("test-siglip")
+                        preprocess_config = model.preprocess_config
+                        self.assertEqual(322, preprocess_config.size)
+                        self.assertEqual("RGB", preprocess_config.mode)
+                        self.assertEqual((0.5, 0.5, 0.5), preprocess_config.mean)
+                        self.assertEqual((0.5, 0.5, 0.5), preprocess_config.std)
+                        self.assertEqual("bicubic", preprocess_config.interpolation)
+                        self.assertEqual("squash", preprocess_config.resize_mode)
+                        self.assertEqual(0, preprocess_config.fill_color)
+
+    def test_open_clip_load_fromHuggingFaceHub_success(self):
+        model_tag = "my_test_model"
+        model_properties = {
+            "name": "hf-hub:my_test_hub",
+            "type": "open_clip",
+        }
+        with patch("marqo.s2_inference.clip_utils.open_clip.create_model_and_transforms",
+                   return_value=(MagicMock(), MagicMock(), MagicMock())) \
+                as mock_create_model:
+            with patch("marqo.s2_inference.clip_utils.open_clip.get_tokenizer", return_value=MagicMock()) \
+                    as mock_tokenizer:
+                with patch.object(MagicMock(), 'eval', return_value=None) as mock_eval:
+                    model = OPEN_CLIP(model_tag, model_properties=model_properties, device="cpu")
+                    model.load()
+                    mock_create_model.assert_called_once_with(
+                        model_name="hf-hub:my_test_hub",
+                        device="cpu",
+                        cache_dir=ModelCache.clip_cache_path
+                    )
+                    mock_tokenizer.assert_called_once_with("hf-hub:my_test_hub")
+
+    def test_open_clip_load_fromMarqoModelRegistry_success(self):
+        model_tag = "open_clip/ViT-B-32/laion5b_s13b_b90k"
+        model_properties = {
+            "name": "open_clip/ViT-B-32/laion5b_s13b_b90k",
+            "type": "open_clip",
+        }
+        with patch("marqo.s2_inference.clip_utils.open_clip.create_model_and_transforms",
+                   return_value=(MagicMock(), MagicMock(), MagicMock())) \
+                as mock_create_model:
+            with patch("marqo.s2_inference.clip_utils.open_clip.get_tokenizer", return_value=MagicMock()) \
+                    as mock_tokenizer:
+                with patch.object(MagicMock(), 'eval', return_value=None) as mock_eval:
+                    model = OPEN_CLIP(model_tag, model_properties=model_properties, device="cpu")
+                    model.load()
+                    mock_create_model.assert_called_once_with(
+                        model_name="ViT-B-32",
+                        pretrained="laion5b_s13b_b90k",
+                        device="cpu",
+                        cache_dir=ModelCache.clip_cache_path
+                    )
+                    mock_tokenizer.assert_called_once_with("ViT-B-32")

From a8c9bf5f6818b4c74504f34f0de3fda1a48bd6b9 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 21 Aug 2024 13:22:56 +1000
Subject: [PATCH 03/63] Upgrade requirements.txt

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 798313fa8..35ed9bc98 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -21,7 +21,7 @@ ARG COMMITHASH
 WORKDIR /app
 
 COPY requirements.txt requirements.txt
-RUN pip3 install --no-cache-dir -r requirements.txt
+RUN pip3 install --no-cache-dir -r --upgrade requirements.txt
 RUN rm requirements.txt
 
 # Stage 3: Final stage that builds on the base image

From 0d40b581b2e27624b0f519d6edc93e2f78facee2 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 21 Aug 2024 13:32:52 +1000
Subject: [PATCH 04/63] Upgrade requirements.txt

---
 requirements.dev.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.dev.txt b/requirements.dev.txt
index f93bb4f0a..7e46a1743 100644
--- a/requirements.dev.txt
+++ b/requirements.dev.txt
@@ -29,7 +29,7 @@ Pillow==9.3.0
 numpy==1.23.4
 validators==0.20.0
 sentence-transformers==2.2.2
-open_clip_torch==2.24.0
+open_clip_torch==2.26.1
 clip-marqo==1.0.2
 onnx
 protobuf

From a2bef12ec45c231b93b51af435ce66d26dbb160f Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 21 Aug 2024 16:15:09 +1000
Subject: [PATCH 05/63] Remove max sequence length

---
 .../core/inference/models/abstract_clip_model.py   | 14 ++++++++------
 src/marqo/s2_inference/clip_utils.py               |  2 +-
 src/marqo/s2_inference/s2_inference.py             |  8 +++++---
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/src/marqo/core/inference/models/abstract_clip_model.py b/src/marqo/core/inference/models/abstract_clip_model.py
index 2be6f6253..b60646004 100644
--- a/src/marqo/core/inference/models/abstract_clip_model.py
+++ b/src/marqo/core/inference/models/abstract_clip_model.py
@@ -27,12 +27,14 @@ class AbstractCLIPModel(AbstractModel):
         preprocess: The preprocessing pipeline for the model, initialized to `None` and to be set by subclasses.
     """
 
-    def __init__(self, model_type: str,
-                 device: Optional[str] = None,
-                 embedding_dim: Optional[int] = None,
-                 truncate: bool = True,
-                 model_properties: Optional[dict] = None,
-                 **kwargs):
+    def __init__(
+            self, model_type: str,
+            device: Optional[str] = None,
+            embedding_dim: Optional[int] = None,
+            truncate: bool = True,
+            model_properties: Optional[dict] = None,
+            **kwargs
+    ):
         """Instantiate the abstract CLIP model.
 
         Args:
diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 6eaffd705..0e5ddef01 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -680,7 +680,7 @@ def _load_tokenizer_from_checkpoint(self) -> Callable:
             return open_clip.get_tokenizer(self.model_properties.name)
         else:
             logger.info(f"Custom HFTokenizer is provided. Loading...")
-            return HFTokenizer(self.model_properties.tokenizer)
+            return HFTokenizer(self.model_properties.tokenizer, )
 
     def _load_tokenizer_from_hf_repo(self) -> Callable:
         return open_clip.get_tokenizer(self.model_properties.name)
diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py
index c1b1ceb11..a1dd77462 100644
--- a/src/marqo/s2_inference/s2_inference.py
+++ b/src/marqo/s2_inference/s2_inference.py
@@ -505,10 +505,12 @@ def _load_model(
     print(f"loading for: model_name={model_name} and properties={model_properties}")
     loader = _get_model_loader(model_properties.get('name', None), model_properties)
 
-    max_sequence_length = model_properties.get('tokens', get_default_seq_length())
     model = loader(
-        model_properties.get('name', None), device=device, embedding_dim=model_properties['dimensions'],
-        max_seq_length=max_sequence_length, model_properties=model_properties, model_auth=model_auth
+        model_properties.get('name', None),
+        device=device,
+        embedding_dim=model_properties['dimensions'],
+        model_properties=model_properties,
+        model_auth=model_auth
     )
     model.load()
     return model

From c3fd6f5b18b2252c4b9758a4d0f5863c89da4093 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 21 Aug 2024 16:53:29 +1000
Subject: [PATCH 06/63] Remove outdated open_clip tests

---
 tests/s2_inference/test_clip_utils.py | 120 +++++++++++++-------------
 1 file changed, 60 insertions(+), 60 deletions(-)

diff --git a/tests/s2_inference/test_clip_utils.py b/tests/s2_inference/test_clip_utils.py
index 6a93db71a..f005a66d8 100644
--- a/tests/s2_inference/test_clip_utils.py
+++ b/tests/s2_inference/test_clip_utils.py
@@ -211,63 +211,63 @@ def test_multilingual_clip_with_no_device(self):
         except InternalError as e:
             pass
         
-
-class TestOpenClipLoad(unittest.TestCase):
-
-    @patch('marqo.s2_inference.clip_utils.open_clip.create_model_and_transforms', 
-           return_value=(mock.Mock(), mock.Mock(), mock.Mock()))
-    def test_load_without_model_properties(self, mock_open_clip_create_model_and_transforms):
-        """By default laion400m_e32 is loaded..."""
-        open_clip = OPEN_CLIP(device="cpu")
-        open_clip.load()
-        mock_open_clip_create_model_and_transforms.assert_called_once_with(
-            'ViT-B-32-quickgelu', pretrained='laion400m_e32', 
-            device='cpu', jit=False, cache_dir=ModelCache.clip_cache_path)
-
-    @patch('marqo.s2_inference.clip_utils.open_clip.create_model_and_transforms', 
-           return_value=(mock.Mock(), mock.Mock(), mock.Mock()))
-    @patch('os.path.isfile', return_value=True)
-    def test_load_with_local_file(self, mock_isfile, mock_open_clip_create_model_and_transforms):
-        model_path = 'localfile.pth'
-        open_clip = OPEN_CLIP(model_properties={'localpath': model_path}, device="cpu")
-        open_clip.load()
-        mock_open_clip_create_model_and_transforms.assert_called_once_with(
-            model_name=open_clip.model_name, jit=False, pretrained=model_path,
-            precision='fp32', image_mean=None, image_std=None, 
-            device='cpu', cache_dir=ModelCache.clip_cache_path)
-
-    @patch('marqo.s2_inference.clip_utils.open_clip.create_model_and_transforms', 
-           return_value=(mock.Mock(), mock.Mock(), mock.Mock()))
-    @patch('validators.url', return_value=True)
-    @patch('marqo.s2_inference.clip_utils.download_model', return_value='model.pth')
-    def test_load_with_url(self, mock_download_model, mock_validators_url, mock_open_clip_create_model_and_transforms):
-        model_url = 'http://model.com/model.pth'
-        open_clip = OPEN_CLIP(model_properties={'url': model_url}, device="cpu")
-        open_clip.load()
-        mock_download_model.assert_called_once_with(url=model_url)
-        mock_open_clip_create_model_and_transforms.assert_called_once_with(
-            model_name=open_clip.model_name, jit=False, pretrained='model.pth', precision='fp32',
-            image_mean=None, image_std=None, device='cpu', cache_dir=ModelCache.clip_cache_path)
-
-    @patch('marqo.s2_inference.clip_utils.open_clip.create_model_and_transforms', 
-           return_value=(mock.Mock(), mock.Mock(), mock.Mock()))
-    @patch('marqo.s2_inference.clip_utils.CLIP._download_from_repo', 
-           return_value='model.pth')
-    def test_load_with_model_location(self, mock_download_from_repo, mock_open_clip_create_model_and_transforms):
-        open_clip = OPEN_CLIP(model_properties={
-            ModelProperties.model_location: ModelLocation(
-                auth_required=True, hf=HfModelLocation(repo_id='someId', filename='some_file.pt')).dict()}, device="cpu")
-        open_clip.load()
-        mock_download_from_repo.assert_called_once()
-        mock_open_clip_create_model_and_transforms.assert_called_once_with(
-            model_name=open_clip.model_name, jit=False, pretrained='model.pth', precision='fp32',
-            image_mean=None, image_std=None, device='cpu', cache_dir=ModelCache.clip_cache_path)
-    
-    def test_open_clip_with_no_device(self):
-        # Should fail, raising internal error
-        try:
-            model_url = 'http://example.com/model.pth'
-            clip = OPEN_CLIP(model_properties={'url': model_url})
-            raise AssertionError
-        except InternalError as e:
-            pass
+#
+# class TestOpenClipLoad(unittest.TestCase):
+#
+#     @patch('marqo.s2_inference.clip_utils.open_clip.create_model_and_transforms',
+#            return_value=(mock.Mock(), mock.Mock(), mock.Mock()))
+#     def test_load_without_model_properties(self, mock_open_clip_create_model_and_transforms):
+#         """By default laion400m_e32 is loaded..."""
+#         open_clip = OPEN_CLIP(device="cpu")
+#         open_clip.load()
+#         mock_open_clip_create_model_and_transforms.assert_called_once_with(
+#             'ViT-B-32-quickgelu', pretrained='laion400m_e32',
+#             device='cpu', jit=False, cache_dir=ModelCache.clip_cache_path)
+#
+#     @patch('marqo.s2_inference.clip_utils.open_clip.create_model_and_transforms',
+#            return_value=(mock.Mock(), mock.Mock(), mock.Mock()))
+#     @patch('os.path.isfile', return_value=True)
+#     def test_load_with_local_file(self, mock_isfile, mock_open_clip_create_model_and_transforms):
+#         model_path = 'localfile.pth'
+#         open_clip = OPEN_CLIP(model_properties={'localpath': model_path}, device="cpu")
+#         open_clip.load()
+#         mock_open_clip_create_model_and_transforms.assert_called_once_with(
+#             model_name=open_clip.model_name, jit=False, pretrained=model_path,
+#             precision='fp32', image_mean=None, image_std=None,
+#             device='cpu', cache_dir=ModelCache.clip_cache_path)
+#
+#     @patch('marqo.s2_inference.clip_utils.open_clip.create_model_and_transforms',
+#            return_value=(mock.Mock(), mock.Mock(), mock.Mock()))
+#     @patch('validators.url', return_value=True)
+#     @patch('marqo.s2_inference.clip_utils.download_model', return_value='model.pth')
+#     def test_load_with_url(self, mock_download_model, mock_validators_url, mock_open_clip_create_model_and_transforms):
+#         model_url = 'http://model.com/model.pth'
+#         open_clip = OPEN_CLIP(model_properties={'url': model_url}, device="cpu")
+#         open_clip.load()
+#         mock_download_model.assert_called_once_with(url=model_url)
+#         mock_open_clip_create_model_and_transforms.assert_called_once_with(
+#             model_name=open_clip.model_name, jit=False, pretrained='model.pth', precision='fp32',
+#             image_mean=None, image_std=None, device='cpu', cache_dir=ModelCache.clip_cache_path)
+#
+#     @patch('marqo.s2_inference.clip_utils.open_clip.create_model_and_transforms',
+#            return_value=(mock.Mock(), mock.Mock(), mock.Mock()))
+#     @patch('marqo.s2_inference.clip_utils.CLIP._download_from_repo',
+#            return_value='model.pth')
+#     def test_load_with_model_location(self, mock_download_from_repo, mock_open_clip_create_model_and_transforms):
+#         open_clip = OPEN_CLIP(model_properties={
+#             ModelProperties.model_location: ModelLocation(
+#                 auth_required=True, hf=HfModelLocation(repo_id='someId', filename='some_file.pt')).dict()}, device="cpu")
+#         open_clip.load()
+#         mock_download_from_repo.assert_called_once()
+#         mock_open_clip_create_model_and_transforms.assert_called_once_with(
+#             model_name=open_clip.model_name, jit=False, pretrained='model.pth', precision='fp32',
+#             image_mean=None, image_std=None, device='cpu', cache_dir=ModelCache.clip_cache_path)
+#
+#     def test_open_clip_with_no_device(self):
+#         # Should fail, raising internal error
+#         try:
+#             model_url = 'http://example.com/model.pth'
+#             clip = OPEN_CLIP(model_properties={'url': model_url})
+#             raise AssertionError
+#         except InternalError as e:
+#             pass

From 0cb12e99da64c9905f1a3c8820d5da90bd0bf7e4 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 21 Aug 2024 18:06:58 +1000
Subject: [PATCH 07/63] Fix unit tests error message

---
 .../test_corrupt_file_error_handling.py              | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py b/tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py
index 59b690a07..eb7fe776c 100644
--- a/tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py
+++ b/tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py
@@ -57,7 +57,7 @@ def setUp(self):
 
         self.dummpy_corrupted_file = "/path/to/corrupted/file.pt"
 
-    @patch('open_clip.create_model_and_transforms', autospec=True)
+    @patch('open_clip.create_model', autospec=True)
     @patch('os.remove', autospec=True)
     def test_corrupted_file_handling(self, mock_os_remove, mock_create_model_and_transforms):
         # Setup
@@ -73,7 +73,7 @@ def test_corrupted_file_handling(self, mock_os_remove, mock_create_model_and_tra
                 # Reset necessary mock
                 mock_os_remove.reset_mock()
 
-    @patch('open_clip.create_model_and_transforms', autospec=True)
+    @patch('open_clip.create_model', autospec=True)
     @patch('os.remove', autospec=True)
     def test_file_removal_failure_handling(self, mock_os_remove, mock_create_model_and_transforms):
         # Setup
@@ -92,7 +92,7 @@ def test_file_removal_failure_handling(self, mock_os_remove, mock_create_model_a
                 # Reset the mock
                 mock_os_remove.reset_mock()
 
-    @patch('open_clip.create_model_and_transforms', autospec=True)
+    @patch('open_clip.create_model', autospec=True)
     @patch('os.remove', autospec=True)
     def test_other_errors_handling(self, mock_os_remove, mock_create_model_and_transforms):
         # Setup
@@ -105,7 +105,7 @@ def test_other_errors_handling(self, mock_os_remove, mock_create_model_and_trans
                 self.assertIn("Marqo encountered an error when loading custom open_clip model", str(context.exception))
                 mock_os_remove.assert_not_called()
 
-    @patch('open_clip.create_model_and_transforms', autospec=True)
+    @patch('open_clip.create_model', autospec=True)
     @patch('os.remove', autospec=True)
     def test_load_clip_into_open_clip_errors_handling(self, mock_os_remove, mock_create_model_and_transforms):
         # Setup
@@ -117,7 +117,7 @@ def test_load_clip_into_open_clip_errors_handling(self, mock_os_remove, mock_cre
                 with self.assertRaises(InvalidModelPropertiesError) as context:
                     _ = _load_model(**self.load_parameters, model_properties=model_properties)
                 self.assertIn(
-                    "You may have tried to load a `clip` model even though `model_properties['type']` is set to 'open_clip'",
+                    "You may have tried to load a clip model even though model_properties['type'] is set to 'open_clip'",
                     str(context.exception))
                 mock_os_remove.assert_not_called()
 
@@ -132,7 +132,7 @@ def test_load_clip_model_into_open_clip_no_mock(self):
             _ = _load_model(**self.load_parameters, model_properties=model_properties)
 
         self.assertIn(
-            "You may have tried to load a `clip` model even though `model_properties['type']` is set to 'open_clip'",
+            "You may have tried to load a clip model even though model_properties['type'] is set to 'open_clip'",
             str(context.exception))
 
     def test_incomplete_download_open_clip_no_mock(self):

From aaa98784bcbae0bbffb41c45a1e9e81e5e509ea0 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Thu, 29 Aug 2024 10:12:11 +1000
Subject: [PATCH 08/63] Add mobile clipmodel

---
 Dockerfile                               |  2 +-
 src/marqo/s2_inference/model_registry.py | 14 ++++++++++++++
 tests/s2_inference/test_encoding.py      | 15 +++++++++------
 3 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 35ed9bc98..798313fa8 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -21,7 +21,7 @@ ARG COMMITHASH
 WORKDIR /app
 
 COPY requirements.txt requirements.txt
-RUN pip3 install --no-cache-dir -r --upgrade requirements.txt
+RUN pip3 install --no-cache-dir -r requirements.txt
 RUN rm requirements.txt
 
 # Stage 3: Final stage that builds on the base image
diff --git a/src/marqo/s2_inference/model_registry.py b/src/marqo/s2_inference/model_registry.py
index 99dd68c5b..4b715e660 100644
--- a/src/marqo/s2_inference/model_registry.py
+++ b/src/marqo/s2_inference/model_registry.py
@@ -487,6 +487,20 @@ def _get_open_clip_properties() -> Dict:
             "dimensions": 512,
             "note": "Marqo's fashionSigLIP model",
             "type": "open_clip"
+        },
+        "open_clip/MobileCLIP-B/datacompdr_lt":{
+            "name": "open_clip/MobileCLIP-B/datacompdr_lt",
+            "dimensions": 512,
+            "note": "MobileCLIP model",
+            "type": "open_clip",
+            "pretrained": "datacompdr_lt"
+        },
+        "open_clip/MobileCLIP-S1/datacompdr": {
+            "name": "open_clip/MobileCLIP-S1/datacompdr",
+            "dimensions": 512,
+            "note": "MobileCLIP model",
+            "type": "open_clip",
+            "pretrained": "datacompdr"
         }
     }
     return OPEN_CLIP_MODEL_PROPERTIES
diff --git a/tests/s2_inference/test_encoding.py b/tests/s2_inference/test_encoding.py
index 8024c2ade..701a39b2e 100644
--- a/tests/s2_inference/test_encoding.py
+++ b/tests/s2_inference/test_encoding.py
@@ -280,12 +280,15 @@ class TestOpenClipModelEncoding(unittest.TestCase):
     '''
 
     def setUp(self) -> None:
-        self.open_clip_test_model = ['open_clip/RN50/yfcc15m', 'open_clip/ViT-B-32/laion2b_s34b_b79k',
-                                     'open_clip/ViT-B-16/laion2b_s34b_b88k', 'open_clip/convnext_base/laion400m_s13b_b51k',
-                                     'open_clip/convnext_base_w/laion_aesthetic_s13b_b82k',
-                                     'open_clip/coca_ViT-B-32/mscoco_finetuned_laion2b_s13b_b90k',
-                                     'open_clip/EVA02-B-16/merged2b_s8b_b131k']
-        pass
+        self.open_clip_test_model = [
+            'open_clip/RN50/yfcc15m', 'open_clip/ViT-B-32/laion2b_s34b_b79k',
+            'open_clip/ViT-B-16/laion2b_s34b_b88k', 'open_clip/convnext_base/laion400m_s13b_b51k',
+            'open_clip/convnext_base_w/laion_aesthetic_s13b_b82k',
+            'open_clip/coca_ViT-B-32/mscoco_finetuned_laion2b_s13b_b90k',
+            'open_clip/EVA02-B-16/merged2b_s8b_b131k',
+            "open_clip/MobileCLIP-B/datacompdr_lt",
+            "open_clip/MobileCLIP-S1/datacompdr"
+        ]
 
     def tearDown(self) -> None:
         clear_loaded_models()

From 1d3fa813a6e261cead84d893f9c272eca0a14f66 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Mon, 2 Sep 2024 16:26:40 +1000
Subject: [PATCH 09/63] Resolve farshid's comments

---
 .../inference/models/abstract_clip_model.py   | 12 ++--
 .../core/inference/models/abstract_model.py   | 24 ++++++-
 .../models/{utils.py => image_download.py}    |  0
 src/marqo/s2_inference/clip_utils.py          | 19 ++++--
 tests/s2_inference/test_clip_utils.py         | 63 +------------------
 5 files changed, 41 insertions(+), 77 deletions(-)
 rename src/marqo/core/inference/models/{utils.py => image_download.py} (100%)

diff --git a/src/marqo/core/inference/models/abstract_clip_model.py b/src/marqo/core/inference/models/abstract_clip_model.py
index b60646004..ac71536f0 100644
--- a/src/marqo/core/inference/models/abstract_clip_model.py
+++ b/src/marqo/core/inference/models/abstract_clip_model.py
@@ -2,16 +2,17 @@
 
 from PIL import UnidentifiedImageError
 
-from marqo.core.inference.models.abstract_model import AbstractModel
+from marqo.core.inference.models.abstract_model import AbstractEmbeddingModel
 from marqo.s2_inference.types import *
-from marqo.core.inference.models.utils import _is_image, format_and_load_CLIP_images, format_and_load_CLIP_image
+from marqo.core.inference.models.image_download import (_is_image, format_and_load_CLIP_images,
+                                                        format_and_load_CLIP_image)
 from marqo.s2_inference.logger import get_logger
 import torch
 
 logger = get_logger(__name__)
 
 
-class AbstractCLIPModel(AbstractModel):
+class AbstractCLIPModel(AbstractEmbeddingModel):
     """Abstract base class for CLIP models.
 
     Attributes:
@@ -61,11 +62,6 @@ def __init__(
         self.tokenizer = None
         self.preprocess = None
 
-    @abstractmethod
-    def load_tokenizer(self):
-        """Load tokenizer."""
-        pass
-
     @abstractmethod
     def encode_text(self, inputs: Union[str, List[str]], normalize: bool = True) -> FloatTensor:
         pass
diff --git a/src/marqo/core/inference/models/abstract_model.py b/src/marqo/core/inference/models/abstract_model.py
index 944402242..34ff7a793 100644
--- a/src/marqo/core/inference/models/abstract_model.py
+++ b/src/marqo/core/inference/models/abstract_model.py
@@ -1,12 +1,30 @@
 from abc import ABC, abstractmethod
 
 
-class AbstractModel(ABC):
+class AbstractEmbeddingModel(ABC):
     """This is the abstract base class for all models in Marqo."""
 
-    @abstractmethod
     def load(self):
-        """Load the model."""
+        """Load the model and check if the necessary component are loaded.
+
+        The required components are loaded in the `_load_necessary_components` method.
+        The loaded components are checked in the `_check_loaded_components` method.
+        """
+        self._load_necessary_components()
+        self._check_loaded_components()
+
+    @abstractmethod
+    def _load_necessary_components(self):
+        """Load the necessary components for the model."""
+        pass
+
+    @abstractmethod
+    def _check_loaded_components(self):
+        """Check if the necessary components are loaded.
+
+        Raises:
+            A proper exception if the necessary components are not loaded.
+        """
         pass
 
     @abstractmethod
diff --git a/src/marqo/core/inference/models/utils.py b/src/marqo/core/inference/models/image_download.py
similarity index 100%
rename from src/marqo/core/inference/models/utils.py
rename to src/marqo/core/inference/models/image_download.py
diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 0e5ddef01..14ada3744 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -528,9 +528,9 @@ def __init__(
         self.pretrained = model_type.split("/", 3)[2] if model_type.startswith("open_clip/") else model_type
         self.model_properties = OpenCLIPModelProperties(**self.model_properties)
 
-    def load(self) -> None:
+    def _load_necessary_components(self) -> None:
         """Load the open_clip model and tokenizer."""
-        if self.model_properties.url or self.model_properties.model_location:
+        if self.model_properties.url is not None or self.model_properties.model_location is not None:
             self.model, self.preprocess = self._load_model_and_image_preprocessor_from_checkpoint()
             self.tokenizer = self._load_tokenizer_from_checkpoint()
         elif self.model_properties.name.startswith(HF_HUB_PREFIX):
@@ -547,8 +547,19 @@ def load(self) -> None:
             )
         self.model.eval()
 
-    def load_tokenizer(self):
-        pass
+    def _check_loaded_components(self):
+        """Check if the open_clip model, tokenizer, and image preprocessor are loaded.
+
+        Raises:
+            RuntimeError: If the open_clip model, tokenizer, or image preprocessor is not loaded.
+        """
+        if self.model is None:
+            raise RuntimeError("The open_clip model is not loaded. Please load the model before inference.")
+        if self.tokenizer is None:
+            raise RuntimeError("The open_clip tokenizer is not loaded. Please load the tokenizer before inference.")
+        if self.preprocess is None:
+            raise RuntimeError("The open_clip image preprocessor is not loaded. "
+                               "Please load the image preprocessor before inference.")
 
     def _load_image_preprocessor(self) -> Callable:
         return image_transform_v2(self.preprocess_config)
diff --git a/tests/s2_inference/test_clip_utils.py b/tests/s2_inference/test_clip_utils.py
index f005a66d8..713740c8f 100644
--- a/tests/s2_inference/test_clip_utils.py
+++ b/tests/s2_inference/test_clip_utils.py
@@ -209,65 +209,4 @@ def test_multilingual_clip_with_no_device(self):
             clip = MULTILINGUAL_CLIP(model_properties={'url': model_url})
             raise AssertionError
         except InternalError as e:
-            pass
-        
-#
-# class TestOpenClipLoad(unittest.TestCase):
-#
-#     @patch('marqo.s2_inference.clip_utils.open_clip.create_model_and_transforms',
-#            return_value=(mock.Mock(), mock.Mock(), mock.Mock()))
-#     def test_load_without_model_properties(self, mock_open_clip_create_model_and_transforms):
-#         """By default laion400m_e32 is loaded..."""
-#         open_clip = OPEN_CLIP(device="cpu")
-#         open_clip.load()
-#         mock_open_clip_create_model_and_transforms.assert_called_once_with(
-#             'ViT-B-32-quickgelu', pretrained='laion400m_e32',
-#             device='cpu', jit=False, cache_dir=ModelCache.clip_cache_path)
-#
-#     @patch('marqo.s2_inference.clip_utils.open_clip.create_model_and_transforms',
-#            return_value=(mock.Mock(), mock.Mock(), mock.Mock()))
-#     @patch('os.path.isfile', return_value=True)
-#     def test_load_with_local_file(self, mock_isfile, mock_open_clip_create_model_and_transforms):
-#         model_path = 'localfile.pth'
-#         open_clip = OPEN_CLIP(model_properties={'localpath': model_path}, device="cpu")
-#         open_clip.load()
-#         mock_open_clip_create_model_and_transforms.assert_called_once_with(
-#             model_name=open_clip.model_name, jit=False, pretrained=model_path,
-#             precision='fp32', image_mean=None, image_std=None,
-#             device='cpu', cache_dir=ModelCache.clip_cache_path)
-#
-#     @patch('marqo.s2_inference.clip_utils.open_clip.create_model_and_transforms',
-#            return_value=(mock.Mock(), mock.Mock(), mock.Mock()))
-#     @patch('validators.url', return_value=True)
-#     @patch('marqo.s2_inference.clip_utils.download_model', return_value='model.pth')
-#     def test_load_with_url(self, mock_download_model, mock_validators_url, mock_open_clip_create_model_and_transforms):
-#         model_url = 'http://model.com/model.pth'
-#         open_clip = OPEN_CLIP(model_properties={'url': model_url}, device="cpu")
-#         open_clip.load()
-#         mock_download_model.assert_called_once_with(url=model_url)
-#         mock_open_clip_create_model_and_transforms.assert_called_once_with(
-#             model_name=open_clip.model_name, jit=False, pretrained='model.pth', precision='fp32',
-#             image_mean=None, image_std=None, device='cpu', cache_dir=ModelCache.clip_cache_path)
-#
-#     @patch('marqo.s2_inference.clip_utils.open_clip.create_model_and_transforms',
-#            return_value=(mock.Mock(), mock.Mock(), mock.Mock()))
-#     @patch('marqo.s2_inference.clip_utils.CLIP._download_from_repo',
-#            return_value='model.pth')
-#     def test_load_with_model_location(self, mock_download_from_repo, mock_open_clip_create_model_and_transforms):
-#         open_clip = OPEN_CLIP(model_properties={
-#             ModelProperties.model_location: ModelLocation(
-#                 auth_required=True, hf=HfModelLocation(repo_id='someId', filename='some_file.pt')).dict()}, device="cpu")
-#         open_clip.load()
-#         mock_download_from_repo.assert_called_once()
-#         mock_open_clip_create_model_and_transforms.assert_called_once_with(
-#             model_name=open_clip.model_name, jit=False, pretrained='model.pth', precision='fp32',
-#             image_mean=None, image_std=None, device='cpu', cache_dir=ModelCache.clip_cache_path)
-#
-#     def test_open_clip_with_no_device(self):
-#         # Should fail, raising internal error
-#         try:
-#             model_url = 'http://example.com/model.pth'
-#             clip = OPEN_CLIP(model_properties={'url': model_url})
-#             raise AssertionError
-#         except InternalError as e:
-#             pass
+            pass
\ No newline at end of file

From 2e8e6165b9ce32503d965276674cee1c964b618c Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Mon, 2 Sep 2024 16:54:35 +1000
Subject: [PATCH 10/63] Fix tests

---
 src/marqo/s2_inference/clip_utils.py          |  2 +-
 .../test_open_clip_model_load.py              | 34 +++++++++++++++++--
 2 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 20bca3193..54213f06a 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -705,7 +705,7 @@ def _load_tokenizer_from_checkpoint(self) -> Callable:
             return open_clip.get_tokenizer(self.model_properties.name)
         else:
             logger.info(f"Custom HFTokenizer is provided. Loading...")
-            return HFTokenizer(self.model_properties.tokenizer, )
+            return HFTokenizer(self.model_properties.tokenizer)
 
     def _load_tokenizer_from_hf_repo(self) -> Callable:
         return open_clip.get_tokenizer(self.model_properties.name)
diff --git a/tests/s2_inference/open_clip_models/test_open_clip_model_load.py b/tests/s2_inference/open_clip_models/test_open_clip_model_load.py
index 8c88278da..f7c0f0fef 100644
--- a/tests/s2_inference/open_clip_models/test_open_clip_model_load.py
+++ b/tests/s2_inference/open_clip_models/test_open_clip_model_load.py
@@ -28,9 +28,9 @@ def test_load_OpenCLIPModelFromCheckPointMethod_success(self):
         }
 
         with patch("marqo.s2_inference.clip_utils.OPEN_CLIP._load_model_and_image_preprocessor_from_checkpoint", \
-                   return_value=(MagicMock(), None)) as mock_load_method:
+                   return_value=(MagicMock(), MagicMock())) as mock_load_method:
             with patch("marqo.s2_inference.clip_utils.OPEN_CLIP._load_tokenizer_from_checkpoint",
-                       return_value=None) as mock_load_tokenizer:
+                       return_value=MagicMock()) as mock_load_tokenizer:
                 with patch.object(MagicMock(), 'eval', return_value=None) as mock_eval:
                     model = OPEN_CLIP(model_name, model_properties=model_properties, device="cpu")
                     model.load()
@@ -147,3 +147,33 @@ def test_open_clip_load_fromMarqoModelRegistry_success(self):
                         cache_dir=ModelCache.clip_cache_path
                     )
                     mock_tokenizer.assert_called_once_with("ViT-B-32")
+
+    def test_load_OpenCLIPModel_missing_model_properties(self):
+        """Test loading an OpenCLIP model with missing model properties should raise an error."""
+        model_tag = "my_test_model"
+        model_properties = {
+            "type": "open_clip"
+            # Missing 'name' and 'url'
+        }
+
+        with self.assertRaises(ValueError) as context:
+            model = OPEN_CLIP(model_tag, model_properties=model_properties, device="cpu")
+            model.load()
+
+        self.assertIn("validation error", str(context.exception))
+        self.assertIn("name", str(context.exception))
+
+    def test_load_OpenCLIPModel_unsupported_image_preprocessor(self):
+        """Test loading an OpenCLIP model with an unsupported image preprocessor should raise an error."""
+        model_tag = "my_test_model"
+        model_properties = {
+            "name": "ViT-B-32",
+            "type": "open_clip",
+            "image_preprocessor": "UnsupportedPreprocessor"
+        }
+
+        with self.assertRaises(ValueError) as context:
+            model = OPEN_CLIP(model_tag, model_properties=model_properties, device="cpu")
+            model.load()
+
+        self.assertIn("permitted: 'SigLIP', 'OpenAI', 'OpenCLIP', 'MobileCLIP', 'CLIPA'", str(context.exception))
\ No newline at end of file

From e51405e99d2da27fd04bc5bb22a4762dab798b7d Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Mon, 2 Sep 2024 17:53:57 +1000
Subject: [PATCH 11/63] Update version to 2.12.0

---
 src/marqo/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/marqo/version.py b/src/marqo/version.py
index 55b311ced..0daf82c48 100644
--- a/src/marqo/version.py
+++ b/src/marqo/version.py
@@ -1,4 +1,4 @@
-__version__ = "2.11.3"
+__version__ = "2.12.0"
 
 def get_version() -> str:
     return f"{__version__}"

From 4f8f4865d7b382bdde92358e24970d50cbdd5ee1 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Tue, 3 Sep 2024 09:30:53 +1000
Subject: [PATCH 12/63] Change base version to 29

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 798313fa8..078d9ba90 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,7 +6,7 @@ COPY vespa .
 RUN mvn clean package
 
 # Stage 2: Base image for Python setup
-FROM marqoai/marqo-base:20 as base_image
+FROM marqoai/marqo-base:29 as base_image
 
 # Allow mounting volume containing data and configs for vespa
 VOLUME /opt/vespa/var

From 9e7116de7f20a49487f5f5949fe61fce575b6826 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Tue, 3 Sep 2024 14:49:19 +1000
Subject: [PATCH 13/63] Fix exmaples

---
 src/marqo/s2_inference/model_registry.py              |  2 +-
 .../models/external_apis/abstract_classes.py          | 11 +++--------
 src/marqo/tensor_search/models/external_apis/hf.py    |  6 +++++-
 src/marqo/tensor_search/models/private_models.py      |  8 +++++---
 4 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/src/marqo/s2_inference/model_registry.py b/src/marqo/s2_inference/model_registry.py
index 4b715e660..45147d49c 100644
--- a/src/marqo/s2_inference/model_registry.py
+++ b/src/marqo/s2_inference/model_registry.py
@@ -484,7 +484,7 @@ def _get_open_clip_properties() -> Dict:
         },
         "open_clip/ViT-B-16-SigLIP/marqo-fashionSigLIP": {
             "name": "hf-hub:Marqo/marqo-fashionSigLIP",
-            "dimensions": 512,
+            "dimensions": 768,
             "note": "Marqo's fashionSigLIP model",
             "type": "open_clip"
         },
diff --git a/src/marqo/tensor_search/models/external_apis/abstract_classes.py b/src/marqo/tensor_search/models/external_apis/abstract_classes.py
index 0e4217942..e2b0d3dd4 100644
--- a/src/marqo/tensor_search/models/external_apis/abstract_classes.py
+++ b/src/marqo/tensor_search/models/external_apis/abstract_classes.py
@@ -1,18 +1,13 @@
 """
 These are abstract classes that shouldn't be instantiated
 """
-from pydantic import BaseModel
+from marqo.base_model import ImmutableBaseModel
 
-class ExternalAuth(BaseModel):
+class ExternalAuth(ImmutableBaseModel):
     """Authentication used to download an object
     """
-    class Config:
-        allow_mutation = False
 
-class ObjectLocation(BaseModel):
+class ObjectLocation(ImmutableBaseModel):
     """Reference to an object location (for example a pointer to a model file
     in s3
     """
-    class Config:
-        allow_mutation = False
-
diff --git a/src/marqo/tensor_search/models/external_apis/hf.py b/src/marqo/tensor_search/models/external_apis/hf.py
index 87a5d821f..6c698624e 100644
--- a/src/marqo/tensor_search/models/external_apis/hf.py
+++ b/src/marqo/tensor_search/models/external_apis/hf.py
@@ -1,10 +1,14 @@
 from pydantic.dataclasses import dataclass
 from typing import Optional
+
+from pymongo.common import alias
+
 from marqo.tensor_search.models.external_apis.abstract_classes import (
     ObjectLocation, ExternalAuth
 )
 from pydantic import BaseModel, Field, validator
 from marqo.api.exceptions import InvalidArgError
+from pydantic import Field
 
 
 class HfAuth(ExternalAuth):
@@ -12,5 +16,5 @@ class HfAuth(ExternalAuth):
 
 
 class HfModelLocation(ObjectLocation):
-    repo_id: str = Field(..., description="ID of the repository")
+    repo_id: str = Field(..., description="ID of the repository", alias="repoId")
     filename: Optional[str] = Field(None, description="Name of the file")
\ No newline at end of file
diff --git a/src/marqo/tensor_search/models/private_models.py b/src/marqo/tensor_search/models/private_models.py
index 2732e7bf8..32927df44 100644
--- a/src/marqo/tensor_search/models/private_models.py
+++ b/src/marqo/tensor_search/models/private_models.py
@@ -7,8 +7,10 @@
 from pydantic import BaseModel, validator
 from marqo.api.exceptions import InvalidArgError
 from typing import Optional
+from pydantic import Field
+from marqo.base_model import ImmutableBaseModel
 
-class ModelAuth(BaseModel):
+class ModelAuth(ImmutableBaseModel):
     """TODO: insert links to docs in error message"""
     class Config:
         allow_mutation = False
@@ -33,14 +35,14 @@ def _ensure_exactly_one_auth_method(cls, v, values, field):
         return v
 
 
-class ModelLocation(BaseModel):
+class ModelLocation(ImmutableBaseModel):
 
     class Config:
         allow_mutation = False
 
     s3: Optional[S3Location] = None
     hf: Optional[HfModelLocation] = None
-    auth_required: bool = False
+    auth_required: bool = Field(default=False, alias="authRequired")
 
     @validator('s3', 'hf', pre=True, always=True)
     def _ensure_exactly_one_location(cls, v, values, field):

From 9a6089fe1a22e0eaadbf103670bd4a36485a7f42 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Tue, 3 Sep 2024 15:14:29 +1000
Subject: [PATCH 14/63] Fix tests

---
 src/marqo/s2_inference/model_registry.py           | 4 ++--
 src/marqo/tensor_search/models/external_apis/hf.py | 6 +-----
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/marqo/s2_inference/model_registry.py b/src/marqo/s2_inference/model_registry.py
index 45147d49c..15ae94e43 100644
--- a/src/marqo/s2_inference/model_registry.py
+++ b/src/marqo/s2_inference/model_registry.py
@@ -476,13 +476,13 @@ def _get_open_clip_properties() -> Dict:
             "type": "open_clip",
             "pretrained": "datacomp_s34b_b86k"
         },
-        "open_clip/ViT-B-16/marqo-fashionCLIP": {
+        "Marqo/marqo-fashionCLIP": {
             "name": "hf-hub:Marqo/marqo-fashionCLIP",
             "dimensions": 512,
             "note": "Marqo's fashionCLIP model",
             "type": "open_clip"
         },
-        "open_clip/ViT-B-16-SigLIP/marqo-fashionSigLIP": {
+        "Marqo/marqo-fashionSigLIP": {
             "name": "hf-hub:Marqo/marqo-fashionSigLIP",
             "dimensions": 768,
             "note": "Marqo's fashionSigLIP model",
diff --git a/src/marqo/tensor_search/models/external_apis/hf.py b/src/marqo/tensor_search/models/external_apis/hf.py
index 6c698624e..f00ee0225 100644
--- a/src/marqo/tensor_search/models/external_apis/hf.py
+++ b/src/marqo/tensor_search/models/external_apis/hf.py
@@ -1,14 +1,10 @@
-from pydantic.dataclasses import dataclass
 from typing import Optional
 
-from pymongo.common import alias
+from pydantic import Field
 
 from marqo.tensor_search.models.external_apis.abstract_classes import (
     ObjectLocation, ExternalAuth
 )
-from pydantic import BaseModel, Field, validator
-from marqo.api.exceptions import InvalidArgError
-from pydantic import Field
 
 
 class HfAuth(ExternalAuth):

From 9a5922c651ff38930f8cee4bbef33796d0d9f34b Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Tue, 3 Sep 2024 15:30:10 +1000
Subject: [PATCH 15/63] Fix tests

---
 .../s2_inference/open_clip_models/test_marqo_fashion_clip.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/s2_inference/open_clip_models/test_marqo_fashion_clip.py b/tests/s2_inference/open_clip_models/test_marqo_fashion_clip.py
index 3a366f8ad..90abbbb46 100644
--- a/tests/s2_inference/open_clip_models/test_marqo_fashion_clip.py
+++ b/tests/s2_inference/open_clip_models/test_marqo_fashion_clip.py
@@ -565,7 +565,7 @@ def tearDown(self):
     def test_MarqoFashionSigLIPModel_load(self):
         """Test the load method of MarqoFashionCLIPModel."""
 
-        model_tag = "open_clip/ViT-B-16-SigLIP/marqo-fashionSigLIP"
+        model_tag = "Marqo/marqo-fashionSigLIP"
         model_properties = OPEN_CLIP_MODEL_PROPERTIES[model_tag]
         model = OPEN_CLIP(model_tag, device="cpu", model_properties=model_properties)
 
@@ -591,7 +591,7 @@ def test_MarqoFashionSigLIPModel_load(self):
     def test_MarqoFashionCLIPModel_load(self):
         """Test the load method of MarqoFashionCLIPModel."""
 
-        model_tag = "open_clip/ViT-B-16/marqo-fashionCLIP"
+        model_tag = "Marqo/marqo-fashionCLIP"
         model_properties = OPEN_CLIP_MODEL_PROPERTIES[model_tag]
         model = OPEN_CLIP(model_tag, device="cpu", model_properties=model_properties)
 

From c21775ff535d0ccf7f3a526563ed32d3bd22ed54 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 4 Sep 2024 11:44:25 +1000
Subject: [PATCH 16/63] Add some new multilingual clip models

---
 src/marqo/s2_inference/model_registry.py      | 24 +++++++++++++++++++
 .../s2_inference/test_large_model_encoding.py |  6 ++++-
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/src/marqo/s2_inference/model_registry.py b/src/marqo/s2_inference/model_registry.py
index 15ae94e43..011157419 100644
--- a/src/marqo/s2_inference/model_registry.py
+++ b/src/marqo/s2_inference/model_registry.py
@@ -501,6 +501,30 @@ def _get_open_clip_properties() -> Dict:
             "note": "MobileCLIP model",
             "type": "open_clip",
             "pretrained": "datacompdr"
+        },
+        "visheratin/nllb-clip-base-siglip": {
+            "name": "hf-hub:visheratin/nllb-clip-base-siglip",
+            "dimensions": 768,
+            "note": "A multilingual CLIP model",
+            "type": "open_clip"
+        },
+        "visheratin/nllb-siglip-mrl-base": {
+            "name": "hf-hub:visheratin/nllb-siglip-mrl-base",
+            "dimensions": 768,
+            "note": "A multilingual CLIP model",
+            "type": "open_clip"
+        },
+        "visheratin/nllb-clip-large-siglip": {
+            "name": "hf-hub:visheratin/nllb-clip-large-siglip",
+            "dimensions": 1152,
+            "note": "A multilingual CLIP model",
+            "type": "open_clip"
+        },
+        "visheratin/nllb-siglip-mrl-large": {
+            "name": "hf-hub:visheratin/nllb-siglip-mrl-large",
+            "dimensions": 1152,
+            "note": "A multilingual CLIP model",
+            "type": "open_clip"
         }
     }
     return OPEN_CLIP_MODEL_PROPERTIES
diff --git a/tests/s2_inference/test_large_model_encoding.py b/tests/s2_inference/test_large_model_encoding.py
index 358562820..6504b462e 100644
--- a/tests/s2_inference/test_large_model_encoding.py
+++ b/tests/s2_inference/test_large_model_encoding.py
@@ -132,7 +132,11 @@ def setUp(self):
             'open_clip/convnext_large_d/laion2b_s26b_b102k_augreg',
             'open_clip/xlm-roberta-base-ViT-B-32/laion5b_s13b_b90k',
             'open_clip/ViT-H-14-378-quickgelu/dfn5b',
-            'open_clip/ViT-SO400M-14-SigLIP-384/webli'
+            'open_clip/ViT-SO400M-14-SigLIP-384/webli',
+            "visheratin/nllb-siglip-mrl-large",
+            "visheratin/nllb-clip-large-siglip",
+            "visheratin/nllb-siglip-mrl-base",
+            "visheratin/nllb-clip-base-siglip"
         ]
 
     def tearDown(self):

From 39c1557b4d783f68c921068c62811f5018792c9f Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 4 Sep 2024 13:19:47 +1000
Subject: [PATCH 17/63] Add subtests for large clip models

---
 .../s2_inference/test_large_model_encoding.py | 33 +++++++++++--------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/tests/s2_inference/test_large_model_encoding.py b/tests/s2_inference/test_large_model_encoding.py
index 6504b462e..e7aaf2e2e 100644
--- a/tests/s2_inference/test_large_model_encoding.py
+++ b/tests/s2_inference/test_large_model_encoding.py
@@ -162,27 +162,31 @@ def test_load_clip_text_model(self):
         texts = ['hello', 'big', 'asasasasaaaaaaaaaaaa', '', 'a word. another one!?. #$#.']
 
         for name in self.models:
-            model = _load_model(name, model_properties=get_model_properties_from_registry(name), device=device)
+            with self.subTest(f"Testing model: {name}"):
+                model = _load_model(name, model_properties=get_model_properties_from_registry(name), device=device)
 
-            for text in texts:
-                assert abs(model.encode(text) - model.encode([text])).sum() < eps
-                assert abs(model.encode_text(text) - model.encode([text])).sum() < eps
-                assert abs(model.encode(text) - model.encode_text([text])).sum() < eps
+                for text in texts:
+                    assert abs(model.encode(text) - model.encode([text])).sum() < eps
+                    assert abs(model.encode_text(text) - model.encode([text])).sum() < eps
+                    assert abs(model.encode(text) - model.encode_text([text])).sum() < eps
 
-            del model
-            clear_loaded_models()
+                del model
+                clear_loaded_models()
 
     def test_model_outputs(self):
         for model_name in self.models:
-            run_test_model_outputs([model_name])
+            with self.subTest(f"Testing model: {model_name}"):
+                run_test_model_outputs([model_name])
 
     def test_model_normalization(self):
         for model_name in self.models:
-            run_test_model_normalization([model_name])
+            with self.subTest(f"Testing model: {model_name}"):
+                run_test_model_normalization([model_name])
 
     def test_cuda_encode_type(self):
         for model_name in self.models:
-            run_test_cuda_encode_type([model_name])
+            with self.subTest(f"Testing model: {model_name}"):
+                run_test_cuda_encode_type([model_name])
 
     @patch("torch.cuda.amp.autocast")
     def test_autocast_called_in_open_clip(self, mock_autocast):
@@ -190,10 +194,11 @@ def test_autocast_called_in_open_clip(self, mock_autocast):
         contents = ['this is a test sentence. so is this.',
                     "https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image0.jpg"]
         for model_name in names:
-            for content in contents:
-                vectorise(model_name=model_name, content=content, device="cuda")
-                mock_autocast.assert_called_once()
-                mock_autocast.reset_mock()
+            with self.subTest(f"Testing model: {model_name}"):
+                for content in contents:
+                    vectorise(model_name=model_name, content=content, device="cuda")
+                    mock_autocast.assert_called_once()
+                    mock_autocast.reset_mock()
 
 
 

From a124c0c42ad989b93f88dae26db7ddfb77746719 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 4 Sep 2024 14:32:54 +1000
Subject: [PATCH 18/63] Update file name

---
 src/marqo/core/inference/models/abstract_clip_model.py          | 2 +-
 .../models/{abstract_model.py => abstract_embedding_model.py}   | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename src/marqo/core/inference/models/{abstract_model.py => abstract_embedding_model.py} (100%)

diff --git a/src/marqo/core/inference/models/abstract_clip_model.py b/src/marqo/core/inference/models/abstract_clip_model.py
index ac71536f0..8587804e3 100644
--- a/src/marqo/core/inference/models/abstract_clip_model.py
+++ b/src/marqo/core/inference/models/abstract_clip_model.py
@@ -2,7 +2,7 @@
 
 from PIL import UnidentifiedImageError
 
-from marqo.core.inference.models.abstract_model import AbstractEmbeddingModel
+from marqo.core.inference.models.abstract_embedding_model import AbstractEmbeddingModel
 from marqo.s2_inference.types import *
 from marqo.core.inference.models.image_download import (_is_image, format_and_load_CLIP_images,
                                                         format_and_load_CLIP_image)
diff --git a/src/marqo/core/inference/models/abstract_model.py b/src/marqo/core/inference/models/abstract_embedding_model.py
similarity index 100%
rename from src/marqo/core/inference/models/abstract_model.py
rename to src/marqo/core/inference/models/abstract_embedding_model.py

From dd61ac78eaf724bd56e58be8e910b8567896ffa6 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 4 Sep 2024 14:48:46 +1000
Subject: [PATCH 19/63] Finish HF class

---
 .../inference/models/abstract_hf_model.py     | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 src/marqo/core/inference/models/abstract_hf_model.py

diff --git a/src/marqo/core/inference/models/abstract_hf_model.py b/src/marqo/core/inference/models/abstract_hf_model.py
new file mode 100644
index 000000000..8261c3ecd
--- /dev/null
+++ b/src/marqo/core/inference/models/abstract_hf_model.py
@@ -0,0 +1,26 @@
+from marqo.core.inference.models.abstract_embedding_model import AbstractEmbeddingModel
+
+
+class HuggingFaceModel(AbstractEmbeddingModel):
+    """The concrete class for all sentence transformers models loaded from Hugging Face.
+
+
+
+    """
+
+    def __init__(self, model_name: str, ):
+
+
+
+
+    def _check_loaded_components(self):
+
+        pass
+
+
+
+    def _load_necessary_components(self):
+        if self.tokenizer is None:
+            self.tokenizer = self._load_tokenizer()
+        pass
+

From badcd44ca787fd24f0216a28736d6f4a9261f023 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 4 Sep 2024 15:08:23 +1000
Subject: [PATCH 20/63] Add max_seq_length back

---
 src/marqo/s2_inference/clip_utils.py   | 3 ++-
 src/marqo/s2_inference/s2_inference.py | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 54213f06a..84871eb50 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -530,7 +530,8 @@ def __init__(
             embedding_dim: Optional[int] = None,
             truncate: bool = True,
             model_properties: Optional[Dict] = None,
-            model_auth: Optional[Dict] = None
+            model_auth: Optional[Dict] = None,
+            **kwargs
     ) -> None:
 
         super().__init__(model_type, device, embedding_dim, truncate, model_properties)
diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py
index 55b0700cd..18b98a47c 100644
--- a/src/marqo/s2_inference/s2_inference.py
+++ b/src/marqo/s2_inference/s2_inference.py
@@ -510,7 +510,8 @@ def _load_model(
         device=device,
         embedding_dim=model_properties['dimensions'],
         model_properties=model_properties,
-        model_auth=model_auth
+        model_auth=model_auth,
+        max_seq_length=model_properties.get('tokens', get_default_seq_length())
     )
     model.load()
     return model

From 0123c82eb97e64d1aef18c12939690530ae6dab0 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 4 Sep 2024 16:00:02 +1000
Subject: [PATCH 21/63] Update open clip code

---
 .../inference/models/abstract_clip_model.py   | 35 ++++---------------
 .../models/abstract_embedding_model.py        | 27 +++++++++++++-
 src/marqo/s2_inference/clip_utils.py          | 33 ++++++++---------
 .../test_marqo_fashion_clip.py                |  4 +--
 .../test_open_clip_model_load.py              | 16 ++++-----
 5 files changed, 58 insertions(+), 57 deletions(-)

diff --git a/src/marqo/core/inference/models/abstract_clip_model.py b/src/marqo/core/inference/models/abstract_clip_model.py
index 8587804e3..f03e8efb6 100644
--- a/src/marqo/core/inference/models/abstract_clip_model.py
+++ b/src/marqo/core/inference/models/abstract_clip_model.py
@@ -16,11 +16,7 @@ class AbstractCLIPModel(AbstractEmbeddingModel):
     """Abstract base class for CLIP models.
 
     Attributes:
-        model_tag (str): The tag of the model. It is used to identify the model in the model registry.
         device (str): The device to load the model on, typically 'cpu' or 'cuda'.
-        embedding_dim (int, optional): The dimensionality of the model's embeddings. If not provided,
-            it should be inferred from the model.
-        truncate (bool): Indicates whether the text should be truncated to a smaller size in the tokenizer.
         model_properties (dict): A dictionary containing additional properties or configurations
             specific to the model. Defaults to an empty dictionary if not provided.
         model: The actual CLIP model instance, initialized to `None` and to be set by subclasses.
@@ -28,35 +24,18 @@ class AbstractCLIPModel(AbstractEmbeddingModel):
         preprocess: The preprocessing pipeline for the model, initialized to `None` and to be set by subclasses.
     """
 
-    def __init__(
-            self, model_type: str,
-            device: Optional[str] = None,
-            embedding_dim: Optional[int] = None,
-            truncate: bool = True,
-            model_properties: Optional[dict] = None,
-            **kwargs
-    ):
+    def __init__(self, device: Optional[str] = None, model_properties: Optional[dict] = None,
+                 model_auth: Optional[dict] = None):
         """Instantiate the abstract CLIP model.
 
         Args:
-            model_type (str): The type of the model.
-            device (str): The device to load the model on.
-            embedding_dim (int): The embedding dimension of the model.
-            truncate (bool): Whether to truncate the model.
-            model_properties (dict): The properties of the model.
+            device (str): The device to load the model on, typically 'cpu' or 'cuda'.
+            model_properties (dict): A dictionary containing additional properties or configurations
+                specific to the model. Defaults to an empty dictionary if not provided.
+            model_auth (dict): The authentication information for the model. Defaults to `None` if not provided
         """
-        self.model_tag = model_type
 
-        if not device:
-            raise ValueError("`device` is required for loading CLIP models!")
-        self.device = device
-
-        self.embedding_dim = embedding_dim
-        self.truncate = truncate
-
-        self.model_properties = model_properties
-        if self.model_properties is None:
-            self.model_properties = dict()
+        super().__init__(model_properties, device, model_auth)
 
         self.model = None
         self.tokenizer = None
diff --git a/src/marqo/core/inference/models/abstract_embedding_model.py b/src/marqo/core/inference/models/abstract_embedding_model.py
index 34ff7a793..85d7050ea 100644
--- a/src/marqo/core/inference/models/abstract_embedding_model.py
+++ b/src/marqo/core/inference/models/abstract_embedding_model.py
@@ -1,9 +1,29 @@
 from abc import ABC, abstractmethod
+from typing import Optional
 
 
 class AbstractEmbeddingModel(ABC):
     """This is the abstract base class for all models in Marqo."""
 
+    def __init__(self, model_properties: Optional[dict] = None, device: Optional[str] = None,
+                 model_auth: Optional[dict] = None):
+        """Load the model with the given properties.
+
+        Args:
+            model_properties (dict): The properties of the model.
+            device (str): The device to load the model on.
+            model_auth (dict): The authentication information for the model.
+        """
+        if device is None:
+            raise ValueError("`device` is required for loading CLIP models!")
+
+        if model_properties is None:
+            model_properties = dict()
+
+        self.model_properties = self._build_model_properties(model_properties)
+        self.device = device
+        self.model_auth = model_auth
+
     def load(self):
         """Load the model and check if the necessary component are loaded.
 
@@ -13,6 +33,11 @@ def load(self):
         self._load_necessary_components()
         self._check_loaded_components()
 
+    @abstractmethod
+    def _build_model_properties(self, model_properties: dict):
+        """Parse the model properties from the user input and convert it to a pydantic model."""
+        pass
+
     @abstractmethod
     def _load_necessary_components(self):
         """Load the necessary components for the model."""
@@ -29,4 +54,4 @@ def _check_loaded_components(self):
 
     @abstractmethod
     def encode(self):
-        pass
\ No newline at end of file
+        pass
diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 84871eb50..3f63539c2 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -1,5 +1,6 @@
 import os
 from io import BytesIO
+from platform import architecture
 
 import certifi
 import clip
@@ -409,7 +410,7 @@ def encode_text(self, sentence: Union[str, List[str]], normalize=True) -> FloatT
         if self.model is None:
             self.load()
 
-        text = self.tokenizer(sentence, truncate=self.truncate).to(self.device)
+        text = self.tokenizer(sentence, truncate=True).to(self.device)
 
         with torch.no_grad():
             outputs = self.model.encode_text(text)
@@ -525,24 +526,20 @@ def load(self) -> None:
 class OPEN_CLIP(AbstractCLIPModel):
     def __init__(
             self,
-            model_type: str = "open_clip/ViT-B-32-quickgelu/laion400m_e32",
             device: Optional[str] = None,
-            embedding_dim: Optional[int] = None,
-            truncate: bool = True,
             model_properties: Optional[Dict] = None,
             model_auth: Optional[Dict] = None,
-            **kwargs
     ) -> None:
 
-        super().__init__(model_type, device, embedding_dim, truncate, model_properties)
+        super().__init__(device, model_properties, model_auth)
 
         # model_auth gets passed through add_docs and search requests:
-        self.model_auth = model_auth
         self.preprocess_config = None
-        self.model_name = model_type.split("/", 3)[1] if model_type.startswith("open_clip/") else model_type
-        self.pretrained = model_type.split("/", 3)[2] if model_type.startswith("open_clip/") else model_type
         self.model_properties = OpenCLIPModelProperties(**self.model_properties)
 
+    def _build_model_properties(self, model_properties: dict):
+        return OpenCLIPModelProperties(**model_properties)
+
     def _load_necessary_components(self) -> None:
         """Load the open_clip model and tokenizer."""
         if self.model_properties.url is not None or self.model_properties.model_location is not None:
@@ -551,7 +548,7 @@ def _load_necessary_components(self) -> None:
         elif self.model_properties.name.startswith(HF_HUB_PREFIX):
             self.model, self.preprocess = self._load_model_and_image_preprocessor_from_hf_repo()
             self.tokenizer = self._load_tokenizer_from_hf_repo()
-        elif self.model_tag.startswith(MARQO_OPEN_CLIP_REGISTRY_PREFIX):
+        elif self.model_properties.name.startswith(MARQO_OPEN_CLIP_REGISTRY_PREFIX):
             self.model, self.preprocess = self._load_model_and_image_preprocessor_from_open_clip_repo()
             self.tokenizer = self._load_tokenizer_from_open_clip_repo()
         else:
@@ -619,7 +616,7 @@ def _load_model_and_image_preprocessor_from_checkpoint(self) -> Tuple[torch.nn.M
             raise ValueError("The 'url' or 'model_location' is required in 'model_properties' "
                              "when loading a custom open_clip model through a URL or a model_location object")
 
-        logger.info(f"The name of the custom clip model is {self.model_name}. We use open_clip loader")
+        logger.info(f"The name of the custom clip model is {self.model_properties.name}. We use open_clip loader")
 
         try:
             self.preprocess_config = self._aggregate_image_preprocessor_config()
@@ -657,7 +654,7 @@ def _load_model_and_image_preprocessor_from_checkpoint(self) -> Tuple[torch.nn.M
             elif isinstance(e, (AttributeError, RuntimeError)) or (
                     "This could be because the operator doesn't exist for this backend" in str(e)):
                 raise InvalidModelPropertiesError(
-                    f"Marqo encountered an error when loading custom open_clip model '{self.model_name}' with "
+                    f"Marqo encountered an error when loading custom open_clip model '{self.model_properties.name}' with "
                     f"model properties = '{self.model_properties.dict()}'. "
                     f"The error message is {str(e)}. "
                     f"You may have tried to load a clip model even though model_properties['type'] is set to 'open_clip' "
@@ -666,7 +663,7 @@ def _load_model_and_image_preprocessor_from_checkpoint(self) -> Tuple[torch.nn.M
                 )
             else:
                 raise RuntimeError(
-                    f"Marqo encountered an error when loading custom open_clip model {self.model_name} with "
+                    f"Marqo encountered an error when loading custom open_clip model {self.model_properties.name} with "
                     f"model properties = {self.model_properties.dict()}. "
                     f"The error message is {str(e)}. "
                     f"Please check and update your model properties and retry. "
@@ -690,12 +687,12 @@ def _load_model_and_image_preprocessor_from_open_clip_repo(self) -> Tuple[torch.
 
         The model name should be provided in the model properties, and it is a string starting with `open_clip/`.
         """
-        self.model_name = self.model_tag.split("/", 3)[1]
-        self.pretrained = self.model_tag.split("/", 3)[2]
+        architecture = self.model_properties.name.split("/", 3)[1]
+        pretrained = self.model_properties.name.split("/", 3)[2]
 
         model, _, preprocess = open_clip.create_model_and_transforms(
-            model_name=self.model_name,
-            pretrained=self.pretrained,
+            model_name=architecture,
+            pretrained=pretrained,
             device=self.device,
             cache_dir=ModelCache.clip_cache_path
         )
@@ -712,7 +709,7 @@ def _load_tokenizer_from_hf_repo(self) -> Callable:
         return open_clip.get_tokenizer(self.model_properties.name)
 
     def _load_tokenizer_from_open_clip_repo(self) -> Callable:
-        return open_clip.get_tokenizer(self.model_name)
+        return open_clip.get_tokenizer(self.model_properties.name.split("/", 3)[1])
 
     def _download_from_repo(self):
         """Downloads model from an external repo like s3 and returns the filepath
diff --git a/tests/s2_inference/open_clip_models/test_marqo_fashion_clip.py b/tests/s2_inference/open_clip_models/test_marqo_fashion_clip.py
index 90abbbb46..ead7e245b 100644
--- a/tests/s2_inference/open_clip_models/test_marqo_fashion_clip.py
+++ b/tests/s2_inference/open_clip_models/test_marqo_fashion_clip.py
@@ -567,7 +567,7 @@ def test_MarqoFashionSigLIPModel_load(self):
 
         model_tag = "Marqo/marqo-fashionSigLIP"
         model_properties = OPEN_CLIP_MODEL_PROPERTIES[model_tag]
-        model = OPEN_CLIP(model_tag, device="cpu", model_properties=model_properties)
+        model = OPEN_CLIP(device="cpu", model_properties=model_properties)
 
         model.load()
 
@@ -593,7 +593,7 @@ def test_MarqoFashionCLIPModel_load(self):
 
         model_tag = "Marqo/marqo-fashionCLIP"
         model_properties = OPEN_CLIP_MODEL_PROPERTIES[model_tag]
-        model = OPEN_CLIP(model_tag, device="cpu", model_properties=model_properties)
+        model = OPEN_CLIP(device="cpu", model_properties=model_properties)
 
         model.load()
 
diff --git a/tests/s2_inference/open_clip_models/test_open_clip_model_load.py b/tests/s2_inference/open_clip_models/test_open_clip_model_load.py
index f7c0f0fef..c75d1c8c8 100644
--- a/tests/s2_inference/open_clip_models/test_open_clip_model_load.py
+++ b/tests/s2_inference/open_clip_models/test_open_clip_model_load.py
@@ -32,7 +32,7 @@ def test_load_OpenCLIPModelFromCheckPointMethod_success(self):
             with patch("marqo.s2_inference.clip_utils.OPEN_CLIP._load_tokenizer_from_checkpoint",
                        return_value=MagicMock()) as mock_load_tokenizer:
                 with patch.object(MagicMock(), 'eval', return_value=None) as mock_eval:
-                    model = OPEN_CLIP(model_name, model_properties=model_properties, device="cpu")
+                    model = OPEN_CLIP(model_properties=model_properties, device="cpu")
                     model.load()
                     mock_load_method.assert_called_once()
                     mock_load_tokenizer.assert_called_once()
@@ -51,7 +51,7 @@ def test_load_OpenCLIPModelFromCheckPointParameters_success(self):
                     as mock_tokenizer:
                 with patch("marqo.s2_inference.clip_utils.download_model", return_value="my_test_model.pt"):
                     with patch.object(MagicMock(), 'eval', return_value=None) as mock_eval:
-                        model = OPEN_CLIP(model_tag, model_properties=model_properties, device="cpu")
+                        model = OPEN_CLIP(model_properties=model_properties, device="cpu")
                         model.load()
                         mock_create_model.assert_called_once_with(
                             model_name="ViT-B-108",
@@ -86,7 +86,7 @@ def test_load_OpenCLIPModelFromCheckPointPreprocessConfig(self):
                     as mock_tokenizer:
                 with patch("marqo.s2_inference.clip_utils.download_model", return_value="my_test_model.pt"):
                     with patch.object(MagicMock(), 'eval', return_value=None) as mock_eval:
-                        model = OPEN_CLIP(model_tag, model_properties=model_properties, device="cpu")
+                        model = OPEN_CLIP(model_properties=model_properties, device="cpu")
                         model.load()
                         mock_create_model.assert_called_once_with(
                             model_name="test-siglip",
@@ -117,7 +117,7 @@ def test_open_clip_load_fromHuggingFaceHub_success(self):
             with patch("marqo.s2_inference.clip_utils.open_clip.get_tokenizer", return_value=MagicMock()) \
                     as mock_tokenizer:
                 with patch.object(MagicMock(), 'eval', return_value=None) as mock_eval:
-                    model = OPEN_CLIP(model_tag, model_properties=model_properties, device="cpu")
+                    model = OPEN_CLIP(model_properties=model_properties, device="cpu")
                     model.load()
                     mock_create_model.assert_called_once_with(
                         model_name="hf-hub:my_test_hub",
@@ -138,7 +138,7 @@ def test_open_clip_load_fromMarqoModelRegistry_success(self):
             with patch("marqo.s2_inference.clip_utils.open_clip.get_tokenizer", return_value=MagicMock()) \
                     as mock_tokenizer:
                 with patch.object(MagicMock(), 'eval', return_value=None) as mock_eval:
-                    model = OPEN_CLIP(model_tag, model_properties=model_properties, device="cpu")
+                    model = OPEN_CLIP(model_properties=model_properties, device="cpu")
                     model.load()
                     mock_create_model.assert_called_once_with(
                         model_name="ViT-B-32",
@@ -157,7 +157,7 @@ def test_load_OpenCLIPModel_missing_model_properties(self):
         }
 
         with self.assertRaises(ValueError) as context:
-            model = OPEN_CLIP(model_tag, model_properties=model_properties, device="cpu")
+            model = OPEN_CLIP(model_properties=model_properties, device="cpu")
             model.load()
 
         self.assertIn("validation error", str(context.exception))
@@ -173,7 +173,7 @@ def test_load_OpenCLIPModel_unsupported_image_preprocessor(self):
         }
 
         with self.assertRaises(ValueError) as context:
-            model = OPEN_CLIP(model_tag, model_properties=model_properties, device="cpu")
+            model = OPEN_CLIP(model_properties=model_properties, device="cpu")
             model.load()
 
-        self.assertIn("permitted: 'SigLIP', 'OpenAI', 'OpenCLIP', 'MobileCLIP', 'CLIPA'", str(context.exception))
\ No newline at end of file
+        self.assertIn("permitted: 'SigLIP', 'OpenAI', 'OpenCLIP', 'MobileCLIP', 'CLIPA'", str(context.exception))

From 4e2b0400c4c4db29d55ee0a65742e4c5b5af58dd Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 4 Sep 2024 16:22:21 +1000
Subject: [PATCH 22/63] update open clip class

---
 src/marqo/s2_inference/s2_inference.py | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py
index 18b98a47c..87bf977ca 100644
--- a/src/marqo/s2_inference/s2_inference.py
+++ b/src/marqo/s2_inference/s2_inference.py
@@ -503,16 +503,26 @@ def _load_model(
                            f"`unit_test` or `_update_available_models` for threading safeness.")
 
     print(f"loading for: model_name={model_name} and properties={model_properties}")
+
+    model_type = model_properties.get("type")
     loader = _get_model_loader(model_properties.get('name', None), model_properties)
 
-    model = loader(
-        model_properties.get('name', None),
-        device=device,
-        embedding_dim=model_properties['dimensions'],
-        model_properties=model_properties,
-        model_auth=model_auth,
-        max_seq_length=model_properties.get('tokens', get_default_seq_length())
-    )
+    # TODO For each refactored model class, add a new elif block here
+    if model_type == ModelType.OpenCLIP:
+        model = loader(
+            device = device,
+            model_properties = model_properties,
+            model_auth = model_auth,
+        )
+    else:
+        model = loader(
+            model_properties.get('name', None),
+            device=device,
+            embedding_dim=model_properties['dimensions'],
+            model_properties=model_properties,
+            model_auth=model_auth,
+            max_seq_length=model_properties.get('tokens', get_default_seq_length())
+        )
     model.load()
     return model
 

From 4d4ea24f7ad72499ab813b1a0a9c6d02a4c0d848 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 4 Sep 2024 16:26:58 +1000
Subject: [PATCH 23/63] Finish open_clip refactoring

---
 src/marqo/s2_inference/clip_utils.py   | 1 -
 src/marqo/s2_inference/s2_inference.py | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 3f63539c2..199229fb8 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -535,7 +535,6 @@ def __init__(
 
         # model_auth gets passed through add_docs and search requests:
         self.preprocess_config = None
-        self.model_properties = OpenCLIPModelProperties(**self.model_properties)
 
     def _build_model_properties(self, model_properties: dict):
         return OpenCLIPModelProperties(**model_properties)
diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py
index 87bf977ca..1513323c0 100644
--- a/src/marqo/s2_inference/s2_inference.py
+++ b/src/marqo/s2_inference/s2_inference.py
@@ -507,7 +507,8 @@ def _load_model(
     model_type = model_properties.get("type")
     loader = _get_model_loader(model_properties.get('name', None), model_properties)
 
-    # TODO For each refactored model class, add a new elif block here
+    # TODO For each refactored model class, add a new elif block here and remove the if block
+    #  once we have all models refactored
     if model_type == ModelType.OpenCLIP:
         model = loader(
             device = device,

From 945c58966a77616c0c07460b1a674db1dcb6d715 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Thu, 5 Sep 2024 15:54:46 +1000
Subject: [PATCH 24/63] Finish the implementation. Need tests

---
 src/marqo/core/inference/model_download.py    | 140 ++++++++++++
 .../inference/models/abstract_hf_model.py     |  26 ---
 .../inference/models/hugging_face_model.py    | 205 ++++++++++++++++++
 .../models/hugging_face_model_properties.py   | 103 +++++++++
 src/marqo/s2_inference/model_registry.py      |   5 +-
 .../processing/custom_clip_utils.py           |   6 +-
 src/marqo/s2_inference/s2_inference.py        |   2 +-
 7 files changed, 455 insertions(+), 32 deletions(-)
 create mode 100644 src/marqo/core/inference/model_download.py
 delete mode 100644 src/marqo/core/inference/models/abstract_hf_model.py
 create mode 100644 src/marqo/core/inference/models/hugging_face_model.py
 create mode 100644 src/marqo/core/inference/models/hugging_face_model_properties.py

diff --git a/src/marqo/core/inference/model_download.py b/src/marqo/core/inference/model_download.py
new file mode 100644
index 000000000..a25ec4ae6
--- /dev/null
+++ b/src/marqo/core/inference/model_download.py
@@ -0,0 +1,140 @@
+import os
+import urllib
+from typing import Union, Optional
+from urllib.error import HTTPError
+
+from tqdm import tqdm
+
+from marqo.s2_inference.configs import ModelCache
+from marqo.s2_inference.errors import ModelDownloadError, InvalidModelPropertiesError
+from marqo.s2_inference.model_downloading.from_hf import download_model_from_hf
+from marqo.s2_inference.model_downloading.from_s3 import (
+    get_presigned_s3_url, get_s3_model_cache_filename, check_s3_model_already_exists,
+    get_s3_model_absolute_cache_path
+)
+from marqo.tensor_search.models.external_apis.s3 import S3Auth, S3Location
+from marqo.tensor_search.models.private_models import ModelAuth, ModelLocation
+
+
+def download_model(
+        repo_location: Optional[ModelLocation] = None,
+        url: Optional[str] = None,
+        auth: Optional[ModelAuth] = None,
+        download_dir: Optional[str] = None
+    ) -> str:
+    """
+    Download a model from a given location.
+
+    Args:
+        repo_location: object that contains information about the location of a
+            model. For example, s3 bucket and object path
+        url: location of a model specified by a URL
+        auth: object that contains information about authorisation required to
+            download a model. For example, s3 access keys
+        download_dir: The directory where the model should be downloaded.
+
+    Returns:
+        The path of the downloaded model
+    """
+    single_weight_location_validation_msg = (
+        "only exactly one of parameters (repo_location, url) is allowed to be specified.")
+    if repo_location is None and url is None:
+        raise InvalidModelPropertiesError(single_weight_location_validation_msg)
+    if repo_location is not None and url is not None:
+        raise InvalidModelPropertiesError(single_weight_location_validation_msg)
+
+    if url:
+        return download_pretrained_from_url(url=url, cache_dir=download_dir)
+
+    if repo_location.s3:
+        download_kwargs = {'location': repo_location.s3, 'download_dir': download_dir}
+        if auth is not None:
+            download_kwargs['auth'] = auth.s3
+        return download_pretrained_from_s3(**download_kwargs)
+    elif repo_location.hf:
+        download_kwargs = {'location': repo_location.hf, 'download_dir': download_dir}
+        if auth is not None:
+            download_kwargs['auth'] = auth.hf
+        return download_model_from_hf(**download_kwargs)
+
+
+def download_pretrained_from_s3(
+        location: S3Location,
+        auth: Optional[S3Auth] = None,
+        download_dir: Optional[str] = None
+) -> str:
+    """Downloads a pretrained model from S3, if it doesn't exist locally. The basename of the object's
+    key is used for the filename.
+
+    Args:
+        location: Bucket and key of model file to be downloaded
+        auth: AWS IAM access keys to a user with access to the model to be downloaded
+        download_dir: the location where the model should be stored
+
+    Returns:
+        Path to the downloaded model
+    """
+    if check_s3_model_already_exists(location=location, download_dir=download_dir):
+        # TODO: check if abs path is even the most appropriate???
+        return get_s3_model_absolute_cache_path(location=location, download_dir=download_dir)
+
+    url = get_presigned_s3_url(location=location, auth=auth)
+
+    try:
+        return download_pretrained_from_url(
+            url=url, cache_dir=download_dir,
+            cache_file_name=get_s3_model_cache_filename(location)
+        )
+    except HTTPError as e:
+        if e.code == 403:
+            # TODO: add link to auth docs
+            raise ModelDownloadError(
+                "Received 403 error when trying to retrieve model from s3 storage. "
+                "Please check the request's s3 credentials and try again. "
+            ) from e
+        else:
+            raise e
+
+def download_pretrained_from_url(
+        url: str,
+        cache_dir: Union[str, None] = None,
+        cache_file_name: Optional[str] = None,
+) -> str:
+    '''
+    This function takes a clip model checkpoint url as input, downloads the model if it doesn't exist locally,
+    and returns the local path of the downloaded file.
+
+    Args:
+        url: a valid string of the url address.
+        cache_dir: the directory to store the file
+        cache_file_name: name of the model file when it gets downloaded to the cache.
+            If not provided, the basename of the URL is used.
+    Returns:
+        download_target: the local path of the downloaded file.
+    '''
+    buffer_size = 8192
+    if not cache_dir:
+        cache_dir = os.path.expanduser(ModelCache.clip_cache_path)
+    os.makedirs(cache_dir, exist_ok=True)
+
+    if cache_file_name is None:
+        filename = os.path.basename(url)
+    else:
+        filename = cache_file_name
+
+    download_target = os.path.join(cache_dir, filename)
+
+    if os.path.isfile(download_target):
+        return download_target
+
+    with urllib.request.urlopen(url) as source, open(download_target, "wb") as output:
+        with tqdm(total=int(source.headers.get("Content-Length")), ncols=80, unit='iB', unit_scale=True) as loop:
+            while True:
+                buffer = source.read(buffer_size)
+                if not buffer:
+                    break
+
+                output.write(buffer)
+                loop.update(len(buffer))
+
+    return download_target
diff --git a/src/marqo/core/inference/models/abstract_hf_model.py b/src/marqo/core/inference/models/abstract_hf_model.py
deleted file mode 100644
index 8261c3ecd..000000000
--- a/src/marqo/core/inference/models/abstract_hf_model.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from marqo.core.inference.models.abstract_embedding_model import AbstractEmbeddingModel
-
-
-class HuggingFaceModel(AbstractEmbeddingModel):
-    """The concrete class for all sentence transformers models loaded from Hugging Face.
-
-
-
-    """
-
-    def __init__(self, model_name: str, ):
-
-
-
-
-    def _check_loaded_components(self):
-
-        pass
-
-
-
-    def _load_necessary_components(self):
-        if self.tokenizer is None:
-            self.tokenizer = self._load_tokenizer()
-        pass
-
diff --git a/src/marqo/core/inference/models/hugging_face_model.py b/src/marqo/core/inference/models/hugging_face_model.py
new file mode 100644
index 000000000..f5f0fe85e
--- /dev/null
+++ b/src/marqo/core/inference/models/hugging_face_model.py
@@ -0,0 +1,205 @@
+from marqo.core.inference.models.abstract_embedding_model import AbstractEmbeddingModel
+from marqo.core.inference.models.hugging_face_model_properties import HuggingFaceModelProperties, PoolingMethod
+from marqo.s2_inference.errors import InvalidModelPropertiesError
+from marqo import marqo_docs
+from typing import Tuple, Callable
+import os, validators
+import zipfile, tarfile
+from urllib.error import HTTPError
+import numpy as np
+from typing import Optional
+import torch
+from torch import nn
+from transformers import (AutoModel, AutoTokenizer)
+
+from marqo.s2_inference.hf_utils import AutoModelForSentenceEmbedding
+from marqo.tensor_search.models.private_models import ModelLocation, ModelAuth
+from marqo.tensor_search.enums import ModelProperties, InferenceParams
+from marqo.s2_inference.sbert_utils import Model
+from marqo.s2_inference.types import Union, FloatTensor, List
+from marqo.s2_inference.logger import get_logger
+from marqo.tensor_search.enums import ModelProperties
+from marqo.s2_inference.errors import InvalidModelPropertiesError, ModelDownloadError
+from marqo.core.inference.model_download import download_model
+from marqo.s2_inference.configs import ModelCache
+from test import average_pool
+import torch.nn.functional as F
+from pydantic import ValidationError
+
+
+
+def _average_pool_func(model_output, attention_mask):
+    last_hidden = model_output.last_hidden_state.masked_fill(~attention_mask[..., None].bool(), 0.0)
+    return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
+
+
+def _cls_pool_func(model_output, attention_mask = None):
+    return model_output[0][:,0]
+
+
+class HuggingFaceModel(AbstractEmbeddingModel):
+    """The concrete class for all sentence transformers models loaded from Hugging Face."""
+
+    def __init__(self, model_properties: dict, device: str, model_auth: dict):
+        super().__init__(model_properties, device, model_auth)
+
+        self.model = None
+        self.tokenizer = None
+        self.pooling_func = None
+
+    def _build_model_properties(self, model_properties: dict):
+        try:
+            return HuggingFaceModelProperties(**model_properties)
+        except ValidationError as e:
+            raise InvalidModelPropertiesError(f"Invalid model properties for the 'hf' model. Original error {e}") \
+                from e
+
+    def _check_loaded_components(self):
+        if self.model is None:
+            raise RuntimeError("Model is not loaded!")
+        if self.tokenizer is None:
+            raise RuntimeError("Tokenizer is not loaded!")
+        if self.pooling_func is None:
+            raise RuntimeError("Pooling function is not loaded!")
+
+    def _load_necessary_components(self):
+        if self.model_properties.name:
+            self.model, self.tokenizer = self._load_from_hugging_face_repo()
+        elif self.model_properties.url or self.model_properties.model_location:
+            self.model, self.tokenizer = self._load_from_zip_file()
+        else:
+            raise InvalidModelPropertiesError(
+                f"Invalid model properties for the 'hf' model. "
+                f"You do not have the necessary information to load the model. "
+                f"Check {marqo_docs.bring_your_own_model()} for more information."
+            )
+        self.model = self.model.to(self.device)
+        self.pooling_func = self._load_pooling_method()
+        self.model.eval()
+
+    def _load_from_hugging_face_repo(self) -> Tuple:
+        """Load the model from the Hugging Face model hub based on the repo_id."""
+        model = AutoModel.from_pretrained(self.model_properties.name)
+        tokenizer = AutoTokenizer.from_pretrained(self.model_properties.name)
+        return model, tokenizer
+
+    def _load_from_zip_file(self) -> Tuple:
+        """Load the model from a zip file."""
+        zip_file_path = download_model(
+            repo_location=self.model_properties.model_location,
+            url=self.model_properties.url,
+            auth=self.model_auth,
+            download_dir=ModelCache.hf_cache_path
+        )
+
+        model_dir = extract_huggingface_archive(zip_file_path)
+        try:
+            model = AutoModel.from_pretrained(model_dir).to(self.device)
+            tokenizer = AutoTokenizer.from_pretrained(model_dir)
+        except (OSError, ValueError, RuntimeError) as e:
+            raise InvalidModelPropertiesError(
+                f"Marqo encountered an error loading the Hugging Face model, modelProperties={self.model_properties}. "
+                f"Please ensure that the model is a valid Hugging Face model and retry.\n"
+                f" Original error message = {e}") from e
+        return model, tokenizer
+
+    def _load_pooling_method(self) -> Callable:
+        """Load the pooling method for the model."""
+        if self.model_properties.pooling_method == PoolingMethod.Mean:
+            return _average_pool_func
+        elif self.model_properties.pooling_method == PoolingMethod.CLS:
+            return _cls_pool_func
+        else:
+            raise ValueError(f"Invalid pooling method: {self.model_properties.pooling_method}")
+
+    def encode(self, sentence: Union[str, List[str]], normalize=True, **kwargs) -> Union[FloatTensor, np.ndarray]:
+        if isinstance(sentence, str):
+            sentence = [sentence]
+
+        if self.model is None:
+            self.load()
+
+        self.model.normalize = normalize
+        tokenized_texts = self.tokenizer(
+            sentence,
+            padding=True,
+            truncation=True,
+            max_length=self.model_properties.token,
+            return_tensors="pt"
+        ).to(self.device)
+
+        with torch.no_grad():
+            model_output = self.model(**tokenized_texts)
+
+        attention_mask = tokenized_texts['attention_mask']
+
+        embeddings = self.pooling_func(model_output, attention_mask)
+
+        if normalize:
+            embeddings = F.normalize(embeddings, p=2, dim=1)
+
+        return self._convert_output(embeddings)
+
+    def _convert_output(self, output):
+        if self.device == 'cpu':
+            return output.numpy()
+        elif self.device.startswith('cuda'):
+            return output.cpu().numpy()
+
+def extract_huggingface_archive(path: str) -> str:
+    '''
+
+        This function takes the path as input. The path can must be a string that can be:
+        1. A downloaded archive file. This function will extract the model from the archive return the directory path.
+        2. A repo_id in huggingface. This function will return the input string directly.
+
+        path: the downloaded model archive path or a repo_id in huggingface
+    Returns:
+        The directory path to the model or the repo_id in huggingface
+    '''
+    if os.path.isfile(path):
+        # if it's a file, check if it's a compressed file
+        base, ext = os.path.splitext(path)
+        if ext in ['.bin', '.pt']:
+            raise InvalidModelPropertiesError(f"Marqo does not support loading Hugging Face SBERT models from the provided single `{ext}` file. "
+                                              "Please try to wrap the model in a Hugging Face archive file and try again. ")
+        try:
+            # create a new directory with the same name as the file
+            new_dir = base
+            os.makedirs(new_dir, exist_ok=True)
+
+            # extract the compressed file
+            # If the target directory already exists, it will be overwritten by default without warning.
+            if ext == '.zip':
+                with zipfile.ZipFile(path, 'r') as zip_ref:
+                    zip_ref.extractall(new_dir)
+            else:
+                with tarfile.open(path, 'r') as tar_ref:
+                    tar_ref.extractall(new_dir)
+            # return the path to the new directory
+            return new_dir
+        except (tarfile.ReadError, zipfile.BadZipfile):
+            try:
+                os.remove(path)
+            except Exception as remove_e:
+                raise RuntimeError(
+                    f"Marqo encountered an error while attempting to delete a corrupted file `{path}`. "
+                    f"Please report this issue on Marqo's Github Repo and replace the problematic Marqo instance with "
+                    f"a new one. \n "
+                    f"Error message: `{str(remove_e)}`"
+                )
+            raise InvalidModelPropertiesError(f'Marqo encountered an error while extracting the compressed model archive from `{path}`.\n '
+                                              f'This is probably because the file is corrupted or the extension `{ext}` is not supported. '
+                                              f'Marqo has removed the corrupted file from the disk.'
+                                              f'Please ensure that the file is a valid compressed file and try again.')
+        # will this error really happen?
+        except PermissionError:
+            raise InvalidModelPropertiesError(f'Marqo encountered an error while extracting the compressed model archive from `{path}`. '
+                                              f'This is probably because the Marqo does not have the permission to write to the directory. '
+                                              f'Please check the access permission of Marqo and try again.')
+        except Exception as e:
+            raise RuntimeError(f'Marqo encountered an error while extracting the compressed model archive from `{path}`. '
+                                              f'The original error message is `{str(e)}`')
+    else:
+        # return the directory path or repo_id directory
+        return path
\ No newline at end of file
diff --git a/src/marqo/core/inference/models/hugging_face_model_properties.py b/src/marqo/core/inference/models/hugging_face_model_properties.py
new file mode 100644
index 000000000..5f78b5611
--- /dev/null
+++ b/src/marqo/core/inference/models/hugging_face_model_properties.py
@@ -0,0 +1,103 @@
+import json
+from enum import Enum
+from json import JSONDecodeError
+from typing import Optional
+
+from huggingface_hub import hf_hub_download
+from huggingface_hub.utils import HfHubHTTPError
+from pydantic import Field, validator, root_validator
+
+from marqo.base_model import MarqoBaseModel
+from marqo.s2_inference.configs import ModelCache
+from marqo.s2_inference.logger import get_logger
+from marqo.tensor_search.models.private_models import ModelLocation, ModelAuth
+
+logger = get_logger(__name__)
+
+
+class PoolingMethod(str, Enum):
+    Mean = "mean"
+    CLS = "cls"
+
+
+class HuggingFaceModelProperties(MarqoBaseModel):
+    """
+    A class to represent the properties of a Hugging Face model.
+
+    Attributes:
+        name: The name of the model. This will be used as the repo_id in the Hugging Face model hub.
+        token: The token length of the model. It is default to 128.
+        type: The type of the model. It should be "hf".
+        url: The URL of the model checkpoint. It is optional.
+        model_location: The location of the model. It is optional.
+        model_auth: The authentication information for the model. It is optional.
+        note: A note about the model. It is optional.
+        pooling_method: The pooling method for the model. It should be one of the values in the PoolingMethod enum.
+    """
+    name: Optional[str] = None
+    token: int = 128
+    type: str
+    url: Optional[str] = None
+    model_location: Optional[ModelLocation] = Field(default=None, alias="modelLocation")
+    model_auth: Optional[ModelAuth] = Field(default=None, alias="modelAuth")
+    note: Optional[str] = None
+    pooling_method: PoolingMethod = Field(default=PoolingMethod.Mean, alias="poolingMethod")
+
+    @validator("type")
+    def _validate_type(cls, v):
+        if v != "hf":
+            raise ValueError("The type of the model should be 'hf'.")
+        return v
+
+    @validator('pooling_method', pre=True, always=True)
+    def validate_or_infer_pooling_method(cls, v, values):
+        if v is not None:
+            return v
+        name = values.get('name')
+        if name and isinstance(name, str):
+            return cls._infer_pooling_method_from_name(name)
+        return PoolingMethod.Mean
+
+    @staticmethod
+    def _infer_pooling_method_from_name(name: str) -> PoolingMethod:
+        """
+        Infer the pooling method from the model name.
+        Args:
+            name: The name of the model. This is the repo_id in the Hugging Face model hub.
+
+        Returns:
+            The inferred pooling method.
+        """
+        repo_id = name
+        file_name = "1_Pooling/config.json"
+        try:
+            file_path = hf_hub_download(repo_id, file_name, cache_dir=ModelCache.hf_cache_path)
+        except HfHubHTTPError:
+            logger.warn(f"Could not infer pooling method from the model {name}. Defaulting to mean pooling.")
+            return PoolingMethod.Mean
+
+        try:
+            with open(file_path, 'r') as file:
+                content = json.loads(file.read())
+        except JSONDecodeError:
+            logger.warn(f"Could not infer pooling method from the model {name}. Defaulting to mean pooling.")
+            return PoolingMethod.Mean
+
+        if not isinstance(content, dict):
+            logger.warn(f"Could not infer pooling method from the model {name}. Defaulting to mean pooling.")
+            return PoolingMethod.Mean
+
+        if content.get("pooling_mode_cls_token") is True:
+            return PoolingMethod.CLS
+        elif content.get("pooling_mode_mean_tokens") is True:
+            return PoolingMethod.Mean
+        else:
+            logger.warn(f"Could not infer pooling method from the model {name}. Defaulting to mean pooling.")
+            return PoolingMethod.Mean
+
+
+    @root_validator(pre=True)
+    def _validate_url_and_model_location(cls, values):
+        if values.get("url") and values.get("model_location"):
+            raise ValueError("Only one of 'url' and 'model_location' should be provided.")
+        return values
\ No newline at end of file
diff --git a/src/marqo/s2_inference/model_registry.py b/src/marqo/s2_inference/model_registry.py
index 011157419..d16d2533b 100644
--- a/src/marqo/s2_inference/model_registry.py
+++ b/src/marqo/s2_inference/model_registry.py
@@ -1,6 +1,7 @@
 from marqo.s2_inference.clip_utils import CLIP, OPEN_CLIP, MULTILINGUAL_CLIP, FP16_CLIP, \
     get_multilingual_clip_properties
-from marqo.s2_inference.hf_utils import HF_MODEL
+# from marqo.s2_inference.hf_utils import HF_MODEL
+from marqo.core.inference.models.hugging_face_model import HuggingFaceModel
 from marqo.s2_inference.onnx_clip_utils import CLIP_ONNX
 from marqo.s2_inference.random_utils import Random
 from marqo.s2_inference.sbert_onnx_utils import SBERT_ONNX
@@ -2057,7 +2058,7 @@ def _get_model_load_mappings() -> Dict:
             "multilingual_clip" : MULTILINGUAL_CLIP,
             "fp16_clip": FP16_CLIP,
             'random':Random,
-            'hf':HF_MODEL,
+            'hf':HuggingFaceModel,
             "no_model": NO_MODEL}
 
 def load_model_properties() -> Dict:
diff --git a/src/marqo/s2_inference/processing/custom_clip_utils.py b/src/marqo/s2_inference/processing/custom_clip_utils.py
index 0277212aa..37428c69b 100644
--- a/src/marqo/s2_inference/processing/custom_clip_utils.py
+++ b/src/marqo/s2_inference/processing/custom_clip_utils.py
@@ -51,7 +51,7 @@ def download_model(
         url: Optional[str] = None,
         auth: Optional[ModelAuth] = None,
         download_dir: Optional[str] = None
-    ):
+    ) -> str:
     """Downloads a custom CLIP model.
 
     Args:
@@ -91,7 +91,7 @@ def download_model(
 def download_pretrained_from_s3(
         location: S3Location,
         auth: Optional[S3Auth] = None,
-        download_dir: Optional[str] = None):
+        download_dir: Optional[str] = None) -> str:
     """Downloads a pretrained model from S3, if it doesn't exist locally. The basename of the object's
     key is used for the filename.
 
@@ -128,7 +128,7 @@ def download_pretrained_from_url(
         url: str,
         cache_dir: Union[str, None] = None,
         cache_file_name: Optional[str] = None,
-):
+) -> str:
     '''
     This function takes a clip model checkpoint url as input, downloads the model if it doesn't exist locally,
     and returns the local path of the downloaded file.
diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py
index 1513323c0..6c3614a5a 100644
--- a/src/marqo/s2_inference/s2_inference.py
+++ b/src/marqo/s2_inference/s2_inference.py
@@ -509,7 +509,7 @@ def _load_model(
 
     # TODO For each refactored model class, add a new elif block here and remove the if block
     #  once we have all models refactored
-    if model_type == ModelType.OpenCLIP:
+    if model_type in (ModelType.OpenCLIP, ModelType.HF_MODEL):
         model = loader(
             device = device,
             model_properties = model_properties,

From 3b451c79d5fc921261e0b47ea31cbf81757f7d98 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 18 Sep 2024 16:42:23 +1000
Subject: [PATCH 25/63] Finish tests for test_hugging_face_model_properties

---
 .../inference/models/hugging_face_model.py    |  32 ++----
 .../models/hugging_face_model_properties.py   |  38 +++++--
 .../tensor_search/models/private_models.py    |  45 +++++---
 .../test_hugging_face_model_properties.py     | 102 ++++++++++++++++++
 4 files changed, 170 insertions(+), 47 deletions(-)
 create mode 100644 tests/core/inference/test_hugging_face_model_properties.py

diff --git a/src/marqo/core/inference/models/hugging_face_model.py b/src/marqo/core/inference/models/hugging_face_model.py
index f5f0fe85e..b6054a28a 100644
--- a/src/marqo/core/inference/models/hugging_face_model.py
+++ b/src/marqo/core/inference/models/hugging_face_model.py
@@ -1,31 +1,21 @@
-from marqo.core.inference.models.abstract_embedding_model import AbstractEmbeddingModel
-from marqo.core.inference.models.hugging_face_model_properties import HuggingFaceModelProperties, PoolingMethod
-from marqo.s2_inference.errors import InvalidModelPropertiesError
-from marqo import marqo_docs
+import os
+import tarfile
+import zipfile
 from typing import Tuple, Callable
-import os, validators
-import zipfile, tarfile
-from urllib.error import HTTPError
+
 import numpy as np
-from typing import Optional
 import torch
-from torch import nn
+import torch.nn.functional as F
+from pydantic import ValidationError
 from transformers import (AutoModel, AutoTokenizer)
 
-from marqo.s2_inference.hf_utils import AutoModelForSentenceEmbedding
-from marqo.tensor_search.models.private_models import ModelLocation, ModelAuth
-from marqo.tensor_search.enums import ModelProperties, InferenceParams
-from marqo.s2_inference.sbert_utils import Model
-from marqo.s2_inference.types import Union, FloatTensor, List
-from marqo.s2_inference.logger import get_logger
-from marqo.tensor_search.enums import ModelProperties
-from marqo.s2_inference.errors import InvalidModelPropertiesError, ModelDownloadError
+from marqo import marqo_docs
 from marqo.core.inference.model_download import download_model
+from marqo.core.inference.models.abstract_embedding_model import AbstractEmbeddingModel
+from marqo.core.inference.models.hugging_face_model_properties import HuggingFaceModelProperties, PoolingMethod
 from marqo.s2_inference.configs import ModelCache
-from test import average_pool
-import torch.nn.functional as F
-from pydantic import ValidationError
-
+from marqo.s2_inference.errors import InvalidModelPropertiesError
+from marqo.s2_inference.types import Union, FloatTensor, List
 
 
 def _average_pool_func(model_output, attention_mask):
diff --git a/src/marqo/core/inference/models/hugging_face_model_properties.py b/src/marqo/core/inference/models/hugging_face_model_properties.py
index 5f78b5611..07e9bbefb 100644
--- a/src/marqo/core/inference/models/hugging_face_model_properties.py
+++ b/src/marqo/core/inference/models/hugging_face_model_properties.py
@@ -26,6 +26,8 @@ class HuggingFaceModelProperties(MarqoBaseModel):
 
     Attributes:
         name: The name of the model. This will be used as the repo_id in the Hugging Face model hub.
+            This attribute is neglected if 'url' or 'model_location' is provided.
+            We are not raising an error right now as that would be a breaking change.
         token: The token length of the model. It is default to 128.
         type: The type of the model. It should be "hf".
         url: The URL of the model checkpoint. It is optional.
@@ -41,7 +43,7 @@ class HuggingFaceModelProperties(MarqoBaseModel):
     model_location: Optional[ModelLocation] = Field(default=None, alias="modelLocation")
     model_auth: Optional[ModelAuth] = Field(default=None, alias="modelAuth")
     note: Optional[str] = None
-    pooling_method: PoolingMethod = Field(default=PoolingMethod.Mean, alias="poolingMethod")
+    pooling_method: PoolingMethod = Field(..., alias="poolingMethod")
 
     @validator("type")
     def _validate_type(cls, v):
@@ -49,14 +51,22 @@ def _validate_type(cls, v):
             raise ValueError("The type of the model should be 'hf'.")
         return v
 
-    @validator('pooling_method', pre=True, always=True)
-    def validate_or_infer_pooling_method(cls, v, values):
-        if v is not None:
-            return v
+    @root_validator(pre=True, skip_on_failure=True)
+    def _validate_or_infer_pooling_method(cls, values):
+        """Infer the pooling method from the model name if it is not provided.
+
+        If the pooling method is provided, return the values as is.
+        """
+        pooling_method = values.get("pooling_method") or values.get("poolingMethod")
+        if pooling_method is not None:
+            return values
         name = values.get('name')
-        if name and isinstance(name, str):
-            return cls._infer_pooling_method_from_name(name)
-        return PoolingMethod.Mean
+        if isinstance(name, str) and name:
+            pooling_method =  cls._infer_pooling_method_from_name(name)
+        else:
+            pooling_method = PoolingMethod.Mean
+        values["pooling_method"] = pooling_method
+        return values
 
     @staticmethod
     def _infer_pooling_method_from_name(name: str) -> PoolingMethod:
@@ -95,9 +105,15 @@ def _infer_pooling_method_from_name(name: str) -> PoolingMethod:
             logger.warn(f"Could not infer pooling method from the model {name}. Defaulting to mean pooling.")
             return PoolingMethod.Mean
 
-
-    @root_validator(pre=True)
-    def _validate_url_and_model_location(cls, values):
+    @root_validator(skip_on_failure=True)
+    def _validate_minimum_required_fields_to_load(cls, values):
+        """
+        Validate that at least one of 'name', 'url', or 'model_location' is provided.
+        But 'url' and 'model_location' should not be provided together.
+        """
         if values.get("url") and values.get("model_location"):
             raise ValueError("Only one of 'url' and 'model_location' should be provided.")
+        is_custom = values.get("url") or values.get("model_location")
+        if not values.get("name") and not is_custom:
+            raise ValueError("At least one of 'name', 'url', or 'model_location' should be provided.")
         return values
\ No newline at end of file
diff --git a/src/marqo/tensor_search/models/private_models.py b/src/marqo/tensor_search/models/private_models.py
index 32927df44..429f92fa3 100644
--- a/src/marqo/tensor_search/models/private_models.py
+++ b/src/marqo/tensor_search/models/private_models.py
@@ -4,7 +4,7 @@
 """
 from marqo.tensor_search.models.external_apis.hf import HfAuth, HfModelLocation
 from marqo.tensor_search.models.external_apis.s3 import S3Auth, S3Location
-from pydantic import BaseModel, validator
+from pydantic import BaseModel, validator, root_validator
 from marqo.api.exceptions import InvalidArgError
 from typing import Optional
 from pydantic import Field
@@ -35,29 +35,44 @@ def _ensure_exactly_one_auth_method(cls, v, values, field):
         return v
 
 
-class ModelLocation(ImmutableBaseModel):
+class ModelLocation(BaseModel):
 
     class Config:
-        allow_mutation = False
+        allow_mutation = True
 
     s3: Optional[S3Location] = None
     hf: Optional[HfModelLocation] = None
     auth_required: bool = Field(default=False, alias="authRequired")
 
-    @validator('s3', 'hf', pre=True, always=True)
-    def _ensure_exactly_one_location(cls, v, values, field):
-        """TODO: insert links to docs in error message"""
-        other_field = 's3' if field.name == 'hf' else 'hf'
-        if other_field in values and values[other_field] is not None and v is not None:
-            raise InvalidArgError(
-                "More than one model location object was provided. "
-                "Only one model authentication object is allowed")
-        return v
 
-    def __init__(self, **data):
-        super().__init__(**data)
-        if self.s3 is None and self.hf is None:
+    @root_validator(skip_on_failure=True)
+    def _validate_s3_and_hf(cls, values):
+        s3 = values.get('s3')
+        hf = values.get('hf')
+        if s3 is None and hf is None:
             raise InvalidArgError(
                 "Missing model location object. A location object, for example `s3` or  "
                 "`hf`, must be provided. ")
+        if s3 is not None and hf is not None:
+            raise InvalidArgError(
+                "More than one model location object was provided. "
+                "Only one model location object is allowed")
+        return values
+    #
+    # @validator('s3', 'hf', pre=True, always=True)
+    # def _ensure_exactly_one_location(cls, v, values, field):
+    #     """TODO: insert links to docs in error message"""
+    #     other_field = 's3' if field.name == 'hf' else 'hf'
+    #     if other_field in values and values[other_field] is not None and v is not None:
+    #         raise InvalidArgError(
+    #             "More than one model location object was provided. "
+    #             "Only one model authentication object is allowed")
+    #     return v
+    #
+    # def __init__(self, **data):
+    #     super().__init__(**data)
+    #     if self.s3 is None and self.hf is None:
+    #         raise InvalidArgError(
+    #             "Missing model location object. A location object, for example `s3` or  "
+    #             "`hf`, must be provided. ")
 
diff --git a/tests/core/inference/test_hugging_face_model_properties.py b/tests/core/inference/test_hugging_face_model_properties.py
new file mode 100644
index 000000000..0874986c5
--- /dev/null
+++ b/tests/core/inference/test_hugging_face_model_properties.py
@@ -0,0 +1,102 @@
+import unittest
+from marqo.core.inference.models.hugging_face_model_properties import HuggingFaceModelProperties, PoolingMethod
+from pydantic import ValidationError
+from marqo.tensor_search.models.private_models import ModelLocation, ModelAuth
+from marqo.tensor_search.models.external_apis.hf import HfModelLocation
+from unittest import mock
+
+
+class TestHuggingFaceModelProperties(unittest.TestCase):
+
+    def test_valid_model_with_mandatory_fields(self):
+        model = HuggingFaceModelProperties(name="test-model", type="hf")
+        self.assertEqual(model.name, "test-model")
+        self.assertEqual(model.token, 128)
+        self.assertEqual(model.type, "hf")
+        self.assertEqual(model.pooling_method, PoolingMethod.Mean)
+
+    def test_invalid_type(self):
+        with self.assertRaises(ValidationError) as excinfo:
+            HuggingFaceModelProperties(name="test-model", type="invalid_type")
+        self.assertIn("The type of the model should be 'hf'", str(excinfo.exception))
+
+    def test_valid_model_with_url(self):
+        model = HuggingFaceModelProperties(name="test-model", type="hf", url="http://example.com")
+        self.assertEqual(model.url, "http://example.com")
+        self.assertIsNone(model.model_location)
+
+    def test_valid_model_with_model_location(self):
+        model_location = ModelLocation(hf=HfModelLocation(repo_id="test-repo-id", filename="test-filename"))
+        model = HuggingFaceModelProperties(name="test-model", type="hf", model_location=model_location)
+        self.assertEqual(model.model_location, model_location)
+        self.assertIsNone(model.url)
+
+    def test_invalid_model_with_url_and_model_location(self):
+        with self.assertRaises(ValidationError) as excinfo:
+            HuggingFaceModelProperties(
+                name="test-model", type="hf",
+                url="http://example.com",
+                model_location=ModelLocation(hf=HfModelLocation(repo_id="test-repo-id", filename="test-filename"))
+            )
+        self.assertIn("Only one of 'url' and 'model_location' should be provided.", str(excinfo.exception))
+
+    # Test for pooling method inference
+    def test_infer_pooling_method(self):
+        for pooling_method in (PoolingMethod.Mean, PoolingMethod.CLS):
+            with self.subTest(f"Pooling method inferred from name with {pooling_method}"):
+                with mock.patch("marqo.core.inference.models.hugging_face_model_properties."
+                                "HuggingFaceModelProperties._infer_pooling_method_from_name",
+                                return_value = pooling_method) as mock_infer:
+                    model = HuggingFaceModelProperties(name="model-with-cls", type="hf")
+                mock_infer.assert_called_once()
+                self.assertEqual(pooling_method, model.pooling_method)
+
+    def test_explicit_valid_pooling_method(self):
+        model = HuggingFaceModelProperties(name="test-model", type="hf", pooling_method=PoolingMethod.CLS)
+        self.assertEqual(model.pooling_method, PoolingMethod.CLS)
+
+    def test_explicit_invalid_pooling_method(self):
+        with self.assertRaises(ValidationError) as excinfo:
+            _ = HuggingFaceModelProperties(name="test-model", type="hf", pooling_method="invalid")
+        self.assertIn("value is not a valid enumeration member; permitted: 'mean', 'cls'",
+                      str(excinfo.exception))
+
+    def test_model_without_optional_fields(self):
+        model = HuggingFaceModelProperties(name="test-model", type="hf")
+        self.assertIsNone(model.url)
+        self.assertIsNone(model.model_location)
+        self.assertIsNone(model.model_auth)
+        self.assertIsNone(model.note)
+        self.assertEqual(model.pooling_method, PoolingMethod.Mean)
+
+    def test_invalid_model_without_minimum_fields(self):
+        with self.assertRaises(ValidationError) as excinfo:
+            HuggingFaceModelProperties(type="hf")
+        self.assertIn("At least one of 'name', 'url', or 'model_location' should be provided.", str(excinfo.exception))
+
+    def test_invalid_model_with_both_url_and_model_location(self):
+        model_location = ModelLocation(hf=HfModelLocation(repo_id="test-repo-id", filename="test-filename"))
+        with self.assertRaises(ValidationError) as excinfo:
+            HuggingFaceModelProperties(url="http://example.com", model_location=model_location, type="hf")
+        self.assertIn("Only one of 'url' and 'model_location' should be provided.", str(excinfo.exception))
+
+    def test_valid_model_with_custom_url_and_inferred_pooling(self):
+        model = HuggingFaceModelProperties(url="http://example.com", type="hf", pooling_method=None)
+        self.assertEqual(model.pooling_method, PoolingMethod.Mean)
+
+    def test_some_pooling_method_infer_on_real_model(self):
+        test_cases = [
+            ("intfloat/e5-base-v2", PoolingMethod.Mean),
+            ("sentence-transformers/all-MiniLM-L6-v2", PoolingMethod.Mean),
+            ("sentence-transformers/paraphrase-MiniLM-L3-v2", PoolingMethod.Mean),
+            ("sentence-transformers/all-mpnet-base-v2", PoolingMethod.Mean),
+            ("sentence-transformers/bert-base-nli-mean-tokens", PoolingMethod.Mean),
+
+            ("sentence-transformers/nli-bert-base-cls-pooling", PoolingMethod.CLS),
+            ("sentence-transformers/nli-bert-large-cls-pooling", PoolingMethod.CLS),
+        ]
+
+        for model_name, pooling_method in test_cases:
+            with self.subTest(f"Pooling method inferred from name with {model_name}"):
+                model = HuggingFaceModelProperties(name=model_name, type="hf")
+                self.assertEqual(pooling_method, model.pooling_method)
\ No newline at end of file

From 86dc3bc54d299d26bb149aa472b240d4adf86ddf Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Fri, 20 Sep 2024 14:22:36 +1000
Subject: [PATCH 26/63] Add tests for new hugging face module

---
 requirements.dev.txt                          |   2 +-
 .../inference/models/hugging_face_model.py    |  12 +-
 .../models/hugging_face_model_properties.py   |   4 +-
 .../models/open_clip_model_properties.py      |   1 +
 src/marqo/s2_inference/hf_utils.py            | 514 ++++++-------
 .../core/inference/test_hugging_face_model.py | 710 ++++++++++++++++++
 .../test_hugging_face_model_properties.py     |  13 +-
 7 files changed, 989 insertions(+), 267 deletions(-)
 create mode 100644 tests/core/inference/test_hugging_face_model.py

diff --git a/requirements.dev.txt b/requirements.dev.txt
index 6f480581c..0b8f2512e 100644
--- a/requirements.dev.txt
+++ b/requirements.dev.txt
@@ -29,7 +29,7 @@ Pillow==9.3.0
 numpy==1.23.4
 validators==0.20.0
 sentence-transformers==2.2.2
-open_clip_torch==2.26.1
+open_clip_torch==2.24.0
 clip-marqo==1.0.2
 onnx
 protobuf
diff --git a/src/marqo/core/inference/models/hugging_face_model.py b/src/marqo/core/inference/models/hugging_face_model.py
index b6054a28a..7eb99989b 100644
--- a/src/marqo/core/inference/models/hugging_face_model.py
+++ b/src/marqo/core/inference/models/hugging_face_model.py
@@ -69,8 +69,14 @@ def _load_necessary_components(self):
 
     def _load_from_hugging_face_repo(self) -> Tuple:
         """Load the model from the Hugging Face model hub based on the repo_id."""
-        model = AutoModel.from_pretrained(self.model_properties.name)
-        tokenizer = AutoTokenizer.from_pretrained(self.model_properties.name)
+        try:
+            model = AutoModel.from_pretrained(self.model_properties.name)
+            tokenizer = AutoTokenizer.from_pretrained(self.model_properties.name)
+        except (OSError, ValueError, RuntimeError) as e:
+            raise InvalidModelPropertiesError(
+                f"Marqo encountered an error loading the Hugging Face model, modelProperties={self.model_properties}. "
+                f"Please ensure that the model is a valid Hugging Face model and retry.\n"
+                f" Original error message = {e}") from e
         return model, tokenizer
 
     def _load_from_zip_file(self) -> Tuple:
@@ -89,7 +95,7 @@ def _load_from_zip_file(self) -> Tuple:
         except (OSError, ValueError, RuntimeError) as e:
             raise InvalidModelPropertiesError(
                 f"Marqo encountered an error loading the Hugging Face model, modelProperties={self.model_properties}. "
-                f"Please ensure that the model is a valid Hugging Face model and retry.\n"
+                f"Please ensure that the provided zip file is valid.\n"
                 f" Original error message = {e}") from e
         return model, tokenizer
 
diff --git a/src/marqo/core/inference/models/hugging_face_model_properties.py b/src/marqo/core/inference/models/hugging_face_model_properties.py
index 07e9bbefb..925a251c6 100644
--- a/src/marqo/core/inference/models/hugging_face_model_properties.py
+++ b/src/marqo/core/inference/models/hugging_face_model_properties.py
@@ -53,9 +53,9 @@ def _validate_type(cls, v):
 
     @root_validator(pre=True, skip_on_failure=True)
     def _validate_or_infer_pooling_method(cls, values):
-        """Infer the pooling method from the model name if it is not provided.
+        """Infer the pooling_method from the model name if it is not provided.
 
-        If the pooling method is provided, return the values as is.
+        If the pooling_method is provided, return the values as is.
         """
         pooling_method = values.get("pooling_method") or values.get("poolingMethod")
         if pooling_method is not None:
diff --git a/src/marqo/core/inference/models/open_clip_model_properties.py b/src/marqo/core/inference/models/open_clip_model_properties.py
index 6e97059a6..7b1d44d01 100644
--- a/src/marqo/core/inference/models/open_clip_model_properties.py
+++ b/src/marqo/core/inference/models/open_clip_model_properties.py
@@ -45,6 +45,7 @@ class OpenCLIPModelProperties(MarqoBaseModel):
         size: The size of the image. It is optional. If provided, it will override the default size of the image.
         note: A note about the model. It is optional.
         pretrained: The name of the pretrained model. It is optional.
+
     """
     name: str
     type: str
diff --git a/src/marqo/s2_inference/hf_utils.py b/src/marqo/s2_inference/hf_utils.py
index 3adcc58ac..8eeadcbf4 100644
--- a/src/marqo/s2_inference/hf_utils.py
+++ b/src/marqo/s2_inference/hf_utils.py
@@ -1,257 +1,257 @@
-import os, validators
-import zipfile, tarfile
-from urllib.error import HTTPError
-import numpy as np
-from typing import Optional
-import torch
-from torch import nn
-from transformers import (AutoModel, AutoTokenizer)
-from marqo.tensor_search.models.private_models import ModelLocation, ModelAuth
-from marqo.tensor_search.enums import ModelProperties, InferenceParams
-from marqo.s2_inference.sbert_utils import Model
-from marqo.s2_inference.types import Union, FloatTensor, List
-from marqo.s2_inference.logger import get_logger
-from marqo.tensor_search.enums import ModelProperties
-from marqo.s2_inference.errors import InvalidModelPropertiesError, ModelDownloadError
-from marqo.s2_inference.processing.custom_clip_utils import download_model
-from marqo.s2_inference.configs import ModelCache
-
-
-
-logger = get_logger(__name__)
-
-
-class HF_MODEL(Model):
-
-    def __init__(self, *args, **kwargs) -> None:
-        super().__init__(*args, **kwargs)
-        
-        if self.max_seq_length is None:
-            self.max_seq_length = 128
-        self.model_properties = kwargs.get("model_properties", dict())
-        self.model_name = self.model_properties.get("name", None)
-        self.model_auth = kwargs.get(InferenceParams.model_auth, None)
-
-    def load(self) -> None:
-
-        model_location_presence = ModelProperties.model_location in self.model_properties
-        path = self.model_properties.get("localpath", None) or self.model_properties.get("url", None)
-        # HF models can be loaded from 3 entries: path (url or localpath), model_name, or model_location
-        if (path is not None) + (self.model_name is not None) + (model_location_presence is True) != 1:
-            raise InvalidModelPropertiesError("Exactly one of (`localpath`/`url`) or `model_location`, `name` can be specified"
-                                              " in `model_properties` for `hf` models as they conflict with each other in model loading."
-                                              " Please ensure that exactly one of these is specified in `model_properties` and retry.")
-        elif path is not None:
-            if validators.url(path) is True:
-                self.model_path = download_model(url = path, download_dir=ModelCache.hf_cache_path)
-            elif os.path.isdir(path) or os.path.isfile(path):
-                self.model_path = path
-        elif self.model_name is not None:
-            # Loading from structured huggingface repo directly, token is required directly
-            self.model_path = self.model_name
-        elif model_location_presence is True:
-            # This is a special case for huggingface models, where we can load a model directory from a repo
-            if ("hf" in self.model_properties["model_location"]) and ("repo_id" in self.model_properties["model_location"]["hf"]) and \
-                ("filename" not in self.model_properties["model_location"]["hf"]):
-                return self._load_from_private_hf_repo()
-            else:
-                self.model_path = self._download_from_repo()
-
-        # We need to do extraction here if necessary
-        self.model_path = extract_huggingface_archive(self.model_path)
-
-        self.model = AutoModelForSentenceEmbedding(self.model_path).to(self.device)
-        try:
-            self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
-        except (OSError, ValueError, RuntimeError) as e:
-            raise InvalidModelPropertiesError(
-                f"Marqo encountered an error loading the Hugging Face model = `{self.model_path}` using AutoTokenizer "
-                f"Please ensure that the model is a valid Hugging Face model and retry.\n"
-                f" Original error message = {e}")
-        except (HTTPError, ConnectionError) as e:
-            raise ModelDownloadError(
-                f"Marqo encountered an ConnectionError loading the Hugging Face model = `{self.model_path}` using AutoTokenizer. "
-                f"This is likely to be caused by an internet issue. Please check Marqo's internet connection to Hugging Face and retry. \n"
-                f" Original error message = {e}")
-
-    def _load_from_private_hf_repo(self) -> None:
-        """
-        Load a private model from a huggingface repo directly using the `repo_id` attribute in `model_properties`
-        This is a special case for HF models, where we can load a model directory from a repo.
-        The self.model_path will be set to the repo_id, which is the remote path in the HuggingFace repo.
-        Token is also used if provided in `model_auth` object.
-        """
-        model_location = ModelLocation(**self.model_properties[ModelProperties.model_location])
-        self.model_path = model_location.hf.repo_id
-
-        token = None
-        if model_location.auth_required:
-            try:
-                token = self.model_auth.hf.token
-            except AttributeError:
-                raise InvalidModelPropertiesError("Please ensure that `model_auth` is valid for a private Hugging Face model and retry. "
-                                                  "A valid `ModelAuth` object should consist a `hugging face token` attribute for private hf repo models")
-
-        self.model = AutoModelForSentenceEmbedding(model_name=self.model_path, use_auth_token=token).to(self.device)
-        try:
-            self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, use_auth_token=token)
-        except (OSError, ValueError, RuntimeError) as e:
-            raise InvalidModelPropertiesError(f"Marqo encounterend an error loading the Hugging Face model = `{self.model_path}` using AutoTokenizer "
-                                              f"Please ensure that the model is a valid Hugging Face model, the token is correct, and retry\n"
-                                              f" Original error message = {e}")
-        except (HTTPError, ConnectionError) as e:
-            raise ModelDownloadError(f"Marqo encounters ConnectionError loading the Hugging Face model = `{self.model_path}` using AutoTokenizer. "
-                                     f"This is likely to be caused by an internet issue. Please check Marqo's internet connection to Hugging Face and retry. \n"
-                                     f" Original error message = {e}")
-
-    def _download_from_repo(self) -> str:
-        """Downloads model from an external repo like s3 and returns the filepath
-
-        Returns:
-            The model's filepath or a string of hugging face repo name
-
-        Raises:
-            RunTimeError if an empty filepath is detected.
-        """
-        model_location = ModelLocation(**self.model_properties[ModelProperties.model_location])
-        download_model_params = {"repo_location": model_location}
-
-        if model_location.auth_required:
-            download_model_params['auth'] = self.model_auth
-
-        model_file_path = download_model(**download_model_params, download_dir=ModelCache.hf_cache_path)
-        if model_file_path is None or model_file_path == '':
-            raise RuntimeError(
-                'download_model() needs to return a valid filepath to the model! Instead, received '
-                f' filepath `{model_file_path}`')
-        return model_file_path
-
-    def encode(self, sentence: Union[str, List[str]], normalize=True, **kwargs) -> Union[FloatTensor, np.ndarray]:
-
-        if isinstance(sentence, str):
-            sentence = [sentence]
-
-        if self.model is None:
-            self.load()
-
-        self.model.normalize = normalize
-        inputs = self.tokenizer(sentence, padding=True, truncation=True, max_length=self.max_seq_length,
-                                return_tensors="pt").to(self.device)
-
-        with torch.no_grad():
-            return self._convert_output(self.model.forward(**inputs))
-
-    def _convert_output(self, output):
-        if self.device == 'cpu':
-            return output.numpy()
-        elif self.device.startswith('cuda'):
-            return output.cpu().numpy()
-
-
-class AutoModelForSentenceEmbedding(nn.Module):
-
-    def __init__(self, model_name: Optional[str] = None, use_auth_token: Optional[str] = None, normalize=True, pooling='mean'):
-        super().__init__()
-        self.model_name = model_name
-        self.normalize = normalize
-        self.pooling = pooling
-        try:
-            self.model = AutoModel.from_pretrained(model_name, use_auth_token = use_auth_token, cache_dir=ModelCache.hf_cache_path)
-        except (OSError, ValueError, RuntimeError) as e:
-            raise InvalidModelPropertiesError(
-                f"Marqo encounters error loading the Hugging Face model = `{self.model_path}` using AutoModel "
-                f"Please ensure that the model is a valid Hugging Face model and retry.\n"
-                f" Original error message = {e}")
-        except (HTTPError, ConnectionError) as e:
-            raise ModelDownloadError(
-                f"Marqo encounters ConnectionError loading the Hugging Face model = `{self.model_path}` using AutoModel. "
-                f"This is likely to be caused by an internet issue. Please check Marqo's internet connection with Hugging Face and retry. \n"
-                f" Original error message = {e}")
-        self.model.eval()
-        if self.pooling == 'mean':
-            self._pool_func = self.mean_pooling
-        elif self.pooling == 'cls':
-            self._pool_func = self.cls_pooling
-        else:
-            raise TypeError(f"{pooling} not in allowed pooling types of 'mean' or 'cls' ")
-
-    def forward(self, **kwargs):
-
-        model_output = self.model(**kwargs)
-
-        embeddings = self._pool_func(model_output, kwargs['attention_mask'])
-
-        if self.normalize:
-            return nn.functional.normalize(embeddings, p=2, dim=1)
-
-        return embeddings
-
-    def mean_pooling(self, model_output, attention_mask):
-
-        token_embeddings = model_output[0]
-
-        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
-
-        return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
-
-    def cls_pooling(self, model_output, attention_mask):
-        return model_output[0][:, 0]
-
-
-def extract_huggingface_archive(path: str) -> str:
-    '''
-
-        This function takes the path as input. The path can must be a string that can be:
-        1. A downloaded archive file. This function will extract the model from the archive return the directory path.
-        2. A repo_id in huggingface. This function will return the input string directly.
-
-        path: the downloaded model archive path or a repo_id in huggingface
-    Returns:
-        The directory path to the model or the repo_id in huggingface
-    '''
-    if os.path.isfile(path):
-        # if it's a file, check if it's a compressed file
-        base, ext = os.path.splitext(path)
-        if ext in ['.bin', '.pt']:
-            raise InvalidModelPropertiesError(f"Marqo does not support loading Hugging Face SBERT models from the provided single `{ext}` file. "
-                                              "Please try to wrap the model in a Hugging Face archive file and try again. ")
-        try:
-            # create a new directory with the same name as the file
-            new_dir = base
-            os.makedirs(new_dir, exist_ok=True)
-
-            # extract the compressed file
-            # If the target directory already exists, it will be overwritten by default without warning.
-            if ext == '.zip':
-                with zipfile.ZipFile(path, 'r') as zip_ref:
-                    zip_ref.extractall(new_dir)
-            else:
-                with tarfile.open(path, 'r') as tar_ref:
-                    tar_ref.extractall(new_dir)
-            # return the path to the new directory
-            return new_dir
-        except (tarfile.ReadError, zipfile.BadZipfile):
-            try:
-                os.remove(path)
-            except Exception as remove_e:
-                raise RuntimeError(
-                    f"Marqo encountered an error while attempting to delete a corrupted file `{path}`. "
-                    f"Please report this issue on Marqo's Github Repo and replace the problematic Marqo instance with "
-                    f"a new one. \n "
-                    f"Error message: `{str(remove_e)}`"
-                )
-            raise InvalidModelPropertiesError(f'Marqo encountered an error while extracting the compressed model archive from `{path}`.\n '
-                                              f'This is probably because the file is corrupted or the extension `{ext}` is not supported. '
-                                              f'Marqo has removed the corrupted file from the disk.'
-                                              f'Please ensure that the file is a valid compressed file and try again.')
-        # will this error really happen?
-        except PermissionError:
-            raise InvalidModelPropertiesError(f'Marqo encountered an error while extracting the compressed model archive from `{path}`. '
-                                              f'This is probably because the Marqo does not have the permission to write to the directory. '
-                                              f'Please check the access permission of Marqo and try again.')
-        except Exception as e:
-            raise RuntimeError(f'Marqo encountered an error while extracting the compressed model archive from `{path}`. '
-                                              f'The original error message is `{str(e)}`')
-    else:
-        # return the directory path or repo_id directory
-        return path
+# import os, validators
+# import zipfile, tarfile
+# from urllib.error import HTTPError
+# import numpy as np
+# from typing import Optional
+# import torch
+# from torch import nn
+# from transformers import (AutoModel, AutoTokenizer)
+# from marqo.tensor_search.models.private_models import ModelLocation, ModelAuth
+# from marqo.tensor_search.enums import ModelProperties, InferenceParams
+# from marqo.s2_inference.sbert_utils import Model
+# from marqo.s2_inference.types import Union, FloatTensor, List
+# from marqo.s2_inference.logger import get_logger
+# from marqo.tensor_search.enums import ModelProperties
+# from marqo.s2_inference.errors import InvalidModelPropertiesError, ModelDownloadError
+# from marqo.s2_inference.processing.custom_clip_utils import download_model
+# from marqo.s2_inference.configs import ModelCache
+#
+#
+#
+# logger = get_logger(__name__)
+#
+#
+# class HF_MODEL(Model):
+#
+#     def __init__(self, *args, **kwargs) -> None:
+#         super().__init__(*args, **kwargs)
+#
+#         if self.max_seq_length is None:
+#             self.max_seq_length = 128
+#         self.model_properties = kwargs.get("model_properties", dict())
+#         self.model_name = self.model_properties.get("name", None)
+#         self.model_auth = kwargs.get(InferenceParams.model_auth, None)
+#
+#     def load(self) -> None:
+#
+#         model_location_presence = ModelProperties.model_location in self.model_properties
+#         path = self.model_properties.get("localpath", None) or self.model_properties.get("url", None)
+#         # HF models can be loaded from 3 entries: path (url or localpath), model_name, or model_location
+#         if (path is not None) + (self.model_name is not None) + (model_location_presence is True) != 1:
+#             raise InvalidModelPropertiesError("Exactly one of (`localpath`/`url`) or `model_location`, `name` can be specified"
+#                                               " in `model_properties` for `hf` models as they conflict with each other in model loading."
+#                                               " Please ensure that exactly one of these is specified in `model_properties` and retry.")
+#         elif path is not None:
+#             if validators.url(path) is True:
+#                 self.model_path = download_model(url = path, download_dir=ModelCache.hf_cache_path)
+#             elif os.path.isdir(path) or os.path.isfile(path):
+#                 self.model_path = path
+#         elif self.model_name is not None:
+#             # Loading from structured huggingface repo directly, token is required directly
+#             self.model_path = self.model_name
+#         elif model_location_presence is True:
+#             # This is a special case for huggingface models, where we can load a model directory from a repo
+#             if ("hf" in self.model_properties["model_location"]) and ("repo_id" in self.model_properties["model_location"]["hf"]) and \
+#                 ("filename" not in self.model_properties["model_location"]["hf"]):
+#                 return self._load_from_private_hf_repo()
+#             else:
+#                 self.model_path = self._download_from_repo()
+#
+#         # We need to do extraction here if necessary
+#         self.model_path = extract_huggingface_archive(self.model_path)
+#
+#         self.model = AutoModelForSentenceEmbedding(self.model_path).to(self.device)
+#         try:
+#             self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
+#         except (OSError, ValueError, RuntimeError) as e:
+#             raise InvalidModelPropertiesError(
+#                 f"Marqo encountered an error loading the Hugging Face model = `{self.model_path}` using AutoTokenizer "
+#                 f"Please ensure that the model is a valid Hugging Face model and retry.\n"
+#                 f" Original error message = {e}")
+#         except (HTTPError, ConnectionError) as e:
+#             raise ModelDownloadError(
+#                 f"Marqo encountered an ConnectionError loading the Hugging Face model = `{self.model_path}` using AutoTokenizer. "
+#                 f"This is likely to be caused by an internet issue. Please check Marqo's internet connection to Hugging Face and retry. \n"
+#                 f" Original error message = {e}")
+#
+#     def _load_from_private_hf_repo(self) -> None:
+#         """
+#         Load a private model from a huggingface repo directly using the `repo_id` attribute in `model_properties`
+#         This is a special case for HF models, where we can load a model directory from a repo.
+#         The self.model_path will be set to the repo_id, which is the remote path in the HuggingFace repo.
+#         Token is also used if provided in `model_auth` object.
+#         """
+#         model_location = ModelLocation(**self.model_properties[ModelProperties.model_location])
+#         self.model_path = model_location.hf.repo_id
+#
+#         token = None
+#         if model_location.auth_required:
+#             try:
+#                 token = self.model_auth.hf.token
+#             except AttributeError:
+#                 raise InvalidModelPropertiesError("Please ensure that `model_auth` is valid for a private Hugging Face model and retry. "
+#                                                   "A valid `ModelAuth` object should consist a `hugging face token` attribute for private hf repo models")
+#
+#         self.model = AutoModelForSentenceEmbedding(model_name=self.model_path, use_auth_token=token).to(self.device)
+#         try:
+#             self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, use_auth_token=token)
+#         except (OSError, ValueError, RuntimeError) as e:
+#             raise InvalidModelPropertiesError(f"Marqo encounterend an error loading the Hugging Face model = `{self.model_path}` using AutoTokenizer "
+#                                               f"Please ensure that the model is a valid Hugging Face model, the token is correct, and retry\n"
+#                                               f" Original error message = {e}")
+#         except (HTTPError, ConnectionError) as e:
+#             raise ModelDownloadError(f"Marqo encounters ConnectionError loading the Hugging Face model = `{self.model_path}` using AutoTokenizer. "
+#                                      f"This is likely to be caused by an internet issue. Please check Marqo's internet connection to Hugging Face and retry. \n"
+#                                      f" Original error message = {e}")
+#
+#     def _download_from_repo(self) -> str:
+#         """Downloads model from an external repo like s3 and returns the filepath
+#
+#         Returns:
+#             The model's filepath or a string of hugging face repo name
+#
+#         Raises:
+#             RunTimeError if an empty filepath is detected.
+#         """
+#         model_location = ModelLocation(**self.model_properties[ModelProperties.model_location])
+#         download_model_params = {"repo_location": model_location}
+#
+#         if model_location.auth_required:
+#             download_model_params['auth'] = self.model_auth
+#
+#         model_file_path = download_model(**download_model_params, download_dir=ModelCache.hf_cache_path)
+#         if model_file_path is None or model_file_path == '':
+#             raise RuntimeError(
+#                 'download_model() needs to return a valid filepath to the model! Instead, received '
+#                 f' filepath `{model_file_path}`')
+#         return model_file_path
+#
+#     def encode(self, sentence: Union[str, List[str]], normalize=True, **kwargs) -> Union[FloatTensor, np.ndarray]:
+#
+#         if isinstance(sentence, str):
+#             sentence = [sentence]
+#
+#         if self.model is None:
+#             self.load()
+#
+#         self.model.normalize = normalize
+#         inputs = self.tokenizer(sentence, padding=True, truncation=True, max_length=self.max_seq_length,
+#                                 return_tensors="pt").to(self.device)
+#
+#         with torch.no_grad():
+#             return self._convert_output(self.model.forward(**inputs))
+#
+#     def _convert_output(self, output):
+#         if self.device == 'cpu':
+#             return output.numpy()
+#         elif self.device.startswith('cuda'):
+#             return output.cpu().numpy()
+#
+#
+# class AutoModelForSentenceEmbedding(nn.Module):
+#
+#     def __init__(self, model_name: Optional[str] = None, use_auth_token: Optional[str] = None, normalize=True, pooling='mean'):
+#         super().__init__()
+#         self.model_name = model_name
+#         self.normalize = normalize
+#         self.pooling = pooling
+#         try:
+#             self.model = AutoModel.from_pretrained(model_name, use_auth_token = use_auth_token, cache_dir=ModelCache.hf_cache_path)
+#         except (OSError, ValueError, RuntimeError) as e:
+#             raise InvalidModelPropertiesError(
+#                 f"Marqo encounters error loading the Hugging Face model = `{self.model_path}` using AutoModel "
+#                 f"Please ensure that the model is a valid Hugging Face model and retry.\n"
+#                 f" Original error message = {e}")
+#         except (HTTPError, ConnectionError) as e:
+#             raise ModelDownloadError(
+#                 f"Marqo encounters ConnectionError loading the Hugging Face model = `{self.model_path}` using AutoModel. "
+#                 f"This is likely to be caused by an internet issue. Please check Marqo's internet connection with Hugging Face and retry. \n"
+#                 f" Original error message = {e}")
+#         self.model.eval()
+#         if self.pooling == 'mean':
+#             self._pool_func = self.mean_pooling
+#         elif self.pooling == 'cls':
+#             self._pool_func = self.cls_pooling
+#         else:
+#             raise TypeError(f"{pooling} not in allowed pooling types of 'mean' or 'cls' ")
+#
+#     def forward(self, **kwargs):
+#
+#         model_output = self.model(**kwargs)
+#
+#         embeddings = self._pool_func(model_output, kwargs['attention_mask'])
+#
+#         if self.normalize:
+#             return nn.functional.normalize(embeddings, p=2, dim=1)
+#
+#         return embeddings
+#
+#     def mean_pooling(self, model_output, attention_mask):
+#
+#         token_embeddings = model_output[0]
+#
+#         input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+#
+#         return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
+#
+#     def cls_pooling(self, model_output, attention_mask):
+#         return model_output[0][:, 0]
+#
+#
+# def extract_huggingface_archive(path: str) -> str:
+#     '''
+#
+#         This function takes the path as input. The path can must be a string that can be:
+#         1. A downloaded archive file. This function will extract the model from the archive return the directory path.
+#         2. A repo_id in huggingface. This function will return the input string directly.
+#
+#         path: the downloaded model archive path or a repo_id in huggingface
+#     Returns:
+#         The directory path to the model or the repo_id in huggingface
+#     '''
+#     if os.path.isfile(path):
+#         # if it's a file, check if it's a compressed file
+#         base, ext = os.path.splitext(path)
+#         if ext in ['.bin', '.pt']:
+#             raise InvalidModelPropertiesError(f"Marqo does not support loading Hugging Face SBERT models from the provided single `{ext}` file. "
+#                                               "Please try to wrap the model in a Hugging Face archive file and try again. ")
+#         try:
+#             # create a new directory with the same name as the file
+#             new_dir = base
+#             os.makedirs(new_dir, exist_ok=True)
+#
+#             # extract the compressed file
+#             # If the target directory already exists, it will be overwritten by default without warning.
+#             if ext == '.zip':
+#                 with zipfile.ZipFile(path, 'r') as zip_ref:
+#                     zip_ref.extractall(new_dir)
+#             else:
+#                 with tarfile.open(path, 'r') as tar_ref:
+#                     tar_ref.extractall(new_dir)
+#             # return the path to the new directory
+#             return new_dir
+#         except (tarfile.ReadError, zipfile.BadZipfile):
+#             try:
+#                 os.remove(path)
+#             except Exception as remove_e:
+#                 raise RuntimeError(
+#                     f"Marqo encountered an error while attempting to delete a corrupted file `{path}`. "
+#                     f"Please report this issue on Marqo's Github Repo and replace the problematic Marqo instance with "
+#                     f"a new one. \n "
+#                     f"Error message: `{str(remove_e)}`"
+#                 )
+#             raise InvalidModelPropertiesError(f'Marqo encountered an error while extracting the compressed model archive from `{path}`.\n '
+#                                               f'This is probably because the file is corrupted or the extension `{ext}` is not supported. '
+#                                               f'Marqo has removed the corrupted file from the disk.'
+#                                               f'Please ensure that the file is a valid compressed file and try again.')
+#         # will this error really happen?
+#         except PermissionError:
+#             raise InvalidModelPropertiesError(f'Marqo encountered an error while extracting the compressed model archive from `{path}`. '
+#                                               f'This is probably because the Marqo does not have the permission to write to the directory. '
+#                                               f'Please check the access permission of Marqo and try again.')
+#         except Exception as e:
+#             raise RuntimeError(f'Marqo encountered an error while extracting the compressed model archive from `{path}`. '
+#                                               f'The original error message is `{str(e)}`')
+#     else:
+#         # return the directory path or repo_id directory
+#         return path
diff --git a/tests/core/inference/test_hugging_face_model.py b/tests/core/inference/test_hugging_face_model.py
new file mode 100644
index 000000000..d674160a5
--- /dev/null
+++ b/tests/core/inference/test_hugging_face_model.py
@@ -0,0 +1,710 @@
+import unittest
+from unittest import mock
+
+from pydantic import ValidationError
+
+from marqo.core.inference.models.hugging_face_model_properties import HuggingFaceModelProperties, PoolingMethod
+from marqo.tensor_search.models.external_apis.hf import HfModelLocation
+from marqo.tensor_search.models.private_models import ModelLocation
+from marqo.core.inference.models.hugging_face_model import HuggingFaceModel
+from marqo.s2_inference.errors import InvalidModelPropertiesError
+import numpy as np
+
+
+class TestHuggingFaceModel(unittest.TestCase):
+    """Test initializing the HuggingFaceModel with valid properties."""
+
+    E5_BASE_V2_MODEL_EMBEDDINGS = np.squeeze(
+        np.array(
+            [[-1.53993233e-03, -4.38184328e-02, -7.15491399e-02,
+              -2.21859273e-02, 4.31589559e-02, -3.24265547e-02,
+              5.12144640e-02, 2.80460659e-02, -5.22950180e-02,
+              3.33100627e-03, 2.68442389e-02, 6.07045442e-02,
+              -3.73158492e-02, -3.92404646e-02, -5.23854904e-02,
+              6.08786643e-02, 3.59617919e-02, -9.29735659e-04,
+              5.42329103e-02, 3.23522440e-03, -4.17569168e-02,
+              -8.93813092e-03, 5.63500077e-02, -4.33811126e-03,
+              1.49898147e-02, -6.72923215e-03, -1.24906814e-02,
+              4.00044471e-02, -1.34475715e-02, -4.94214892e-02,
+              4.12338413e-03, 1.44163240e-02, 2.67091561e-02,
+              -2.94547062e-02, -8.89921933e-03, 3.08779106e-02,
+              -2.23634876e-02, -3.13180871e-02, -2.51241550e-02,
+              -1.18573848e-02, -6.21171817e-02, -4.05875742e-02,
+              -3.85968015e-02, 4.63903360e-02, -2.54101362e-02,
+              -8.48237984e-03, -9.64764564e-04, 4.94192503e-02,
+              -2.70866305e-02, -5.71681075e-02, -5.17644547e-02,
+              1.92878135e-02, 5.64169362e-02, -1.24792038e-02,
+              -1.77764595e-02, 2.07304321e-02, -3.91926942e-03,
+              -3.22796144e-02, -6.38422742e-02, -2.54739840e-02,
+              5.17578870e-02, -4.11305763e-03, 2.12824978e-02,
+              -8.28473177e-03, 4.98436298e-03, 4.76504155e-02,
+              4.60972171e-03, 3.73470760e-03, -4.46945950e-02,
+              1.06849857e-05, -5.31281054e-04, -7.42445840e-03,
+              -2.32021697e-02, -2.89897551e-03, -1.08536454e-02,
+              -3.90633047e-02, 1.87121518e-02, 6.75488869e-03,
+              6.21497333e-02, 2.02559773e-02, 3.51797265e-04,
+              -3.93331647e-02, -4.08418151e-03, 2.20038239e-02,
+              4.43556532e-02, -1.62268840e-02, -3.05120852e-02,
+              4.14704792e-02, -5.14927916e-02, 3.18746492e-02,
+              -8.48620199e-03, -2.79160067e-02, 1.55973714e-02,
+              3.37110385e-02, 6.33243546e-02, -5.04625663e-02,
+              3.52013223e-02, -5.30491918e-02, 2.25692615e-02,
+              1.95694971e-03, -4.37596589e-02, -1.65351946e-02,
+              -3.28059606e-02, -1.26289725e-02, -8.01686794e-02,
+              -4.44752611e-02, -1.15274042e-02, -1.74678266e-02,
+              -5.21782273e-03, 3.62658985e-02, -5.66074327e-02,
+              -1.00420807e-02, -4.82268445e-02, 5.96063863e-03,
+              -6.50647506e-02, 3.95454317e-02, 6.53651506e-02,
+              -1.19691398e-02, -3.21850181e-02, -3.63157801e-02,
+              -1.39682181e-02, 3.56259942e-02, 5.23050800e-02,
+              6.30052239e-02, 4.96584177e-03, 3.19971107e-02,
+              4.80453372e-02, 1.08391950e-02, -1.44911148e-02,
+              -1.36337653e-02, 2.39572469e-02, -1.68092083e-02,
+              1.59984883e-02, -4.50005308e-02, -3.41027230e-02,
+              -4.23994437e-02, -3.41491848e-02, -1.11522516e-02,
+              3.91584784e-02, -8.63064826e-03, 1.62917341e-03,
+              4.72876392e-02, 4.34379354e-02, -1.60112102e-02,
+              2.45747343e-02, -2.62808930e-02, -1.30942417e-02,
+              -2.46385578e-02, -5.69089167e-02, 4.34393138e-02,
+              2.81369295e-02, 9.39730462e-03, 3.66095454e-02,
+              -3.57634388e-02, -5.15757166e-02, 4.36473340e-02,
+              3.33106704e-02, -2.30557956e-02, -4.34662476e-02,
+              8.22917186e-03, 3.23445201e-02, 3.20423469e-02,
+              2.58185733e-02, 2.08669100e-02, -3.71028371e-02,
+              2.59646680e-02, 3.17775458e-02, -2.79521737e-02,
+              2.55317445e-05, 5.36171831e-02, -3.58999632e-02,
+              -2.48517226e-02, 1.96478218e-02, 1.94271689e-03,
+              -6.67033643e-02, 5.15624769e-02, 4.95204877e-04,
+              2.60936953e-02, -3.17780972e-02, -3.39414505e-03,
+              -4.86411117e-02, 4.45222147e-02, 3.85296158e-02,
+              4.56899665e-02, -4.49299552e-02, -4.69735861e-02,
+              4.79343496e-02, -5.89369908e-02, 2.69743279e-02,
+              6.08315691e-02, -3.96981724e-02, -2.92713158e-02,
+              5.85827008e-02, -6.83050777e-04, 5.50356284e-02,
+              -4.94767725e-02, -3.13899517e-02, 8.52199346e-02,
+              3.29716615e-02, -3.87356529e-04, 5.57458587e-03,
+              1.49246072e-02, 3.67202647e-02, -6.45631030e-02,
+              -8.62391070e-02, -1.58972517e-02, 3.65059227e-02,
+              -4.72036079e-02, 9.78113618e-03, 2.86919046e-02,
+              -3.40395235e-02, -2.46855095e-02, 2.03215890e-02,
+              -3.74348611e-02, 1.24361822e-02, -4.23930883e-02,
+              3.13656889e-02, -1.30750448e-03, 5.46741672e-02,
+              5.89324208e-03, 8.45710263e-02, -1.76011398e-02,
+              -3.36720943e-02, 3.13056707e-02, -1.87502224e-02,
+              -3.24332475e-04, 5.20042256e-02, 1.08312247e-02,
+              -1.47905340e-02, 3.15025076e-03, -3.16224396e-02,
+              -2.01831735e-03, 1.08164884e-02, 3.10172942e-02,
+              -2.63415687e-02, 2.20957715e-02, -1.59405936e-02,
+              4.79880441e-03, 5.59855364e-02, -4.64766026e-02,
+              1.07115433e-02, 7.22416118e-02, -2.35065930e-02,
+              2.94936337e-02, -4.69267145e-02, -3.17620188e-02,
+              3.14577185e-02, -3.31599042e-02, -3.91567498e-02,
+              -1.33554246e-02, -2.21432131e-02, 5.64046856e-03,
+              4.62996401e-02, 5.16851107e-03, 6.71406509e-03,
+              -9.82102230e-02, -1.54658882e-02, 5.52075319e-02,
+              6.54641762e-02, 1.92561112e-02, 2.99217459e-02,
+              1.90807525e-02, -8.32299981e-03, -2.37437077e-02,
+              -5.67882136e-03, -6.17451333e-02, -5.73352724e-02,
+              -7.64597356e-02, 7.11698905e-02, 6.08729795e-02,
+              4.20871787e-02, 7.80280381e-02, -1.38695640e-02,
+              2.03670412e-02, 2.32333187e-02, -6.84289709e-02,
+              5.50778769e-02, -1.06860213e-02, -2.63890754e-02,
+              -2.26620920e-02, -1.10853920e-02, 5.87578416e-02,
+              -1.37342932e-02, -3.72807235e-02, 1.94568485e-02,
+              -7.96313360e-02, 4.59807143e-02, -3.62987965e-02,
+              -2.85756756e-02, -3.30580287e-02, -4.58282745e-03,
+              6.33826479e-02, -5.16325980e-02, -2.22003944e-02,
+              3.94202694e-02, -8.25583562e-03, 5.13864309e-03,
+              3.82165238e-02, 4.69028950e-02, 4.53159325e-02,
+              2.17314996e-02, 4.04652283e-02, 3.39442194e-02,
+              3.58035490e-02, 1.55868707e-02, -6.86571822e-02,
+              4.35505472e-02, -5.98884411e-02, -1.31844394e-02,
+              9.69593599e-03, -5.54979593e-02, -3.48618627e-02,
+              4.45805937e-02, -1.66676361e-02, 9.73566715e-03,
+              -3.78498295e-03, -3.05469669e-02, -3.16619277e-02,
+              -4.89909016e-02, -3.14661451e-02, 4.82626166e-03,
+              1.41253583e-02, 1.96540579e-02, -2.15874482e-02,
+              3.00536547e-02, -3.25051770e-02, -4.23656926e-02,
+              5.14955036e-02, -3.49177630e-03, -1.62759423e-02,
+              -3.87035273e-02, 5.41068800e-02, -3.57351862e-02,
+              4.66594845e-02, -3.40629183e-02, -3.08876690e-02,
+              -3.69671360e-02, -2.27069724e-02, -2.11486574e-02,
+              1.03544667e-02, 4.63655125e-03, 1.83732305e-02,
+              6.68170825e-02, -4.01819386e-02, 2.52030566e-02,
+              -4.80497144e-02, 1.00672785e-02, -1.43906819e-02,
+              -6.41445965e-02, 1.51935192e-02, -1.53031796e-02,
+              4.90951259e-03, 2.71559451e-02, -8.51686392e-03,
+              4.60648024e-03, -4.97527942e-02, 2.24174364e-04,
+              2.68735271e-02, 1.35805942e-02, -2.47550961e-02,
+              3.62117141e-02, -1.67884422e-03, -2.91256625e-02,
+              -2.29127817e-02, 4.16702218e-02, 4.62259650e-02,
+              2.75031398e-05, -3.58557850e-02, 3.61657739e-02,
+              -1.47870332e-02, -6.74858829e-03, -3.82761993e-02,
+              -5.95595874e-02, -2.89052296e-02, -3.25021707e-02,
+              6.24917913e-03, 3.01534459e-02, 5.06045595e-02,
+              -4.44340967e-02, 4.34991671e-03, -4.94660325e-02,
+              -1.62318677e-01, -5.81121407e-02, -2.72427686e-02,
+              -3.14156264e-02, 3.18332091e-02, 6.30181795e-03,
+              2.35881936e-02, -1.83542576e-02, -3.45146214e-03,
+              -2.69651460e-03, 3.45528312e-02, -1.35122845e-02,
+              -8.52363035e-02, 1.05168181e-03, -1.97159662e-03,
+              -3.05432416e-02, -7.14082690e-03, 1.25974445e-02,
+              -7.96545893e-02, -2.02763285e-02, 2.05951408e-02,
+              7.20165521e-02, 7.01677129e-02, -4.30359505e-03,
+              1.86908189e-02, 5.76274358e-02, -5.46180867e-02,
+              3.71913686e-02, -2.98889522e-02, -2.24336684e-02,
+              -4.78838272e-02, 5.01907766e-02, -2.28472846e-03,
+              -2.29000486e-02, 1.54428110e-02, 4.81073968e-02,
+              -2.82442104e-03, 4.50737402e-02, -4.12207693e-02,
+              1.11912638e-02, -4.89227995e-02, 2.87061520e-02,
+              -1.51606118e-02, 6.65191328e-03, 1.81607958e-02,
+              -9.65218898e-03, 2.04727817e-02, -3.04027107e-02,
+              4.03397642e-02, -1.99636258e-02, -6.68246448e-02,
+              -1.86916068e-02, 1.98589023e-02, 2.29181312e-02,
+              -4.96531166e-02, 1.53880240e-03, -1.33979262e-03,
+              -4.27836254e-02, -5.04723825e-02, 3.58899459e-02,
+              3.21483538e-02, 3.07801440e-02, 7.06098154e-02,
+              -2.05557700e-02, -9.60775092e-03, -5.12775965e-02,
+              -4.27475795e-02, -1.66314114e-02, 1.22427968e-02,
+              3.94281605e-03, 1.62784848e-02, 2.04217043e-02,
+              2.78562270e-02, -4.04961966e-02, 3.53613868e-02,
+              -2.28397883e-02, 4.94474396e-02, 1.47242341e-02,
+              -2.89180707e-02, 6.27992824e-02, 2.52445675e-02,
+              -1.72485840e-02, -4.16123830e-02, 9.81981158e-02,
+              3.31860897e-03, -1.60940047e-02, -7.29231685e-02,
+              -1.17015755e-02, -9.40752402e-02, -2.52000429e-02,
+              4.16504731e-03, -6.58993004e-03, 5.73325679e-02,
+              2.39223801e-02, -5.71232615e-03, -6.72712401e-02,
+              -2.43610200e-02, 1.98861603e-02, -5.23395464e-02,
+              -1.17257414e-02, 6.62670881e-02, -3.79348435e-02,
+              1.73155870e-02, -3.04324180e-03, -2.20575500e-02,
+              3.43058147e-02, 1.55564845e-02, 4.58828881e-02,
+              -6.83468506e-02, -4.52903239e-03, 8.61544535e-03,
+              -1.74299181e-02, -8.65335297e-03, -2.12285891e-02,
+              -3.06075271e-02, -3.17619145e-02, 3.53866704e-02,
+              1.27371750e-03, 5.52219385e-03, 3.67731676e-02,
+              2.09426526e-02, -2.62479857e-02, -2.22966745e-02,
+              -9.86706000e-04, 7.63195753e-02, -1.13831637e-02,
+              3.12389657e-02, -1.79441329e-02, 4.22073901e-03,
+              -6.24284297e-02, 3.41456495e-02, 4.74380851e-02,
+              -2.53395233e-02, 2.22158246e-02, 1.08980725e-03,
+              -1.82700586e-02, 2.74093281e-02, -2.36395374e-02,
+              5.66534977e-03, 2.46165581e-02, 5.74524589e-02,
+              -4.45447350e-03, -1.56035209e-02, 2.65500657e-02,
+              4.80396971e-02, 1.63284913e-02, 2.53283582e-03,
+              -4.98772711e-02, -4.64920104e-02, -5.13953380e-02,
+              -5.77078480e-03, -3.59517522e-02, 1.74799003e-02,
+              3.76863219e-02, -4.01153788e-02, 2.98959203e-02,
+              -4.39188927e-02, 5.65573499e-02, -1.20074246e-02,
+              -1.69790108e-02, 1.95173267e-02, 3.85093093e-02,
+              -1.05365198e-02, -1.74229331e-02, -8.94283317e-03,
+              -2.11154986e-02, 3.77129540e-02, 6.34477809e-02,
+              3.42995971e-02, -2.16718446e-02, 1.37758628e-02,
+              -4.87477109e-02, 2.48271711e-02, 2.39046700e-02,
+              -6.03786996e-03, -2.49127485e-02, -5.87582542e-03,
+              -4.45803702e-02, -1.07264286e-02, -2.49500480e-02,
+              -3.53427939e-02, 4.35795030e-03, -1.51483789e-02,
+              -6.36755228e-02, 2.06587408e-02, -1.48379998e-02,
+              -3.42692211e-02, -1.13392146e-02, 5.62237501e-02,
+              -3.89885232e-02, 6.20278455e-02, 3.66568305e-02,
+              5.08336350e-02, -2.55245101e-02, -3.97595577e-02,
+              4.05675210e-02, 2.09310409e-02, 8.26423708e-03,
+              -2.16889735e-02, -5.45453979e-03, 2.87272036e-02,
+              5.04308604e-02, 5.31062707e-02, -2.90425196e-02,
+              -1.76872686e-02, 2.49194205e-02, -1.84365325e-02,
+              -5.73580968e-04, 4.22914885e-02, -1.19949831e-02,
+              3.33814882e-02, 4.23363112e-02, -4.29901294e-02,
+              1.50336856e-02, 1.74857455e-03, -4.49241288e-02,
+              4.04228866e-02, 5.29668331e-02, 6.81887865e-02,
+              3.92002100e-03, -3.30047831e-02, 2.68607531e-02,
+              6.87942058e-02, 3.75969671e-02, 2.56715063e-02,
+              -5.02616949e-02, 4.36541019e-03, -7.20635355e-02,
+              7.42193758e-02, 4.05292623e-02, 7.43994582e-03,
+              -4.85715568e-02, -3.33363598e-04, -9.24233405e-04,
+              -1.49145974e-02, 1.86247304e-02, -1.23054506e-02,
+              -2.57431697e-02, -2.05192547e-02, 4.31957170e-02,
+              2.54208185e-02, -3.06499861e-02, -3.40308212e-02,
+              8.68191011e-03, -7.16370856e-03, -6.01101201e-03,
+              7.03491867e-02, 3.72186638e-02, -2.86593996e-02,
+              4.56372797e-02, 6.12319931e-02, 1.89859923e-02,
+              6.28441647e-02, -1.55821620e-02, 4.43504341e-02,
+              -2.79502245e-03, 6.15532771e-02, 1.03703458e-02,
+              6.30988181e-02, 4.24089432e-02, 5.60553819e-02,
+              -1.39394104e-02, -3.19706090e-02, 3.71167026e-02,
+              -2.00144369e-02, -9.74838622e-03, 4.47796062e-02,
+              -3.47584486e-02, 7.13985115e-02, 2.47936659e-02,
+              4.66571152e-02, 2.12951954e-02, 1.95531342e-02,
+              7.93775823e-03, -5.79514764e-02, 1.79772731e-02,
+              2.58140964e-03, 1.30382339e-02, -3.12318821e-02,
+              -3.14340517e-02, -1.86547413e-02, 2.50276290e-02,
+              3.48293111e-02, -5.20406663e-03, -2.89757121e-02,
+              -2.90802168e-03, -1.11093326e-02, -2.97714528e-02,
+              -3.21674049e-02, 5.21572642e-02, 1.49144502e-02,
+              -2.22427752e-02, -3.45289409e-02, -8.56250431e-03,
+              -6.27698228e-02, 6.80940272e-03, 1.15727470e-03,
+              1.00673260e-02, 7.61638070e-03, -3.06326188e-02,
+              4.00051959e-02, 3.10907662e-02, 3.89613993e-02,
+              3.36998031e-02, -3.28814164e-02, -4.71040839e-03,
+              1.73051730e-02, 8.86960514e-03, -5.23704998e-02,
+              -3.12850885e-02, -3.08352690e-02, 3.86217725e-03,
+              4.03188448e-03, -6.13248069e-03, 9.04717483e-03,
+              -1.77066773e-02, -2.05547065e-02, -4.37792437e-03,
+              2.09461357e-02, -1.96832046e-03, -2.68010679e-03,
+              -5.08974157e-02, -8.00034683e-03, 4.26372625e-02,
+              -1.54677946e-02, 2.12462787e-02, 8.59516207e-03,
+              1.05093252e-02, 1.74335949e-02, -5.26627414e-02,
+              3.36601846e-02, -8.35399143e-03, 4.38855886e-02,
+              -2.45736260e-02, 3.73706035e-02, -6.81321993e-02,
+              1.68453902e-02, 4.50808182e-02, -4.56484444e-02,
+              -3.48260552e-02, 2.13449933e-02, 4.59566014e-03,
+              3.47219482e-02, 4.49425466e-02, 1.09793097e-02,
+              2.24647503e-02, -4.97041978e-02, -3.07102297e-02,
+              -4.94886376e-02, 6.06917031e-03, 2.89279800e-02,
+              8.69093370e-03, 5.67235015e-02, 2.99701951e-02,
+              1.48614319e-02, -1.45340730e-02, 1.34937400e-02,
+              -6.98988214e-02, 2.65878160e-02, -1.64858997e-02,
+              2.86024082e-02, 2.82326005e-02, -1.00837750e-02,
+              1.11203119e-02, 2.20332444e-02, -2.04786845e-02,
+              -5.71124479e-02, 6.35670125e-02, 5.17619029e-02,
+              4.89608981e-02, 1.45385480e-02, 1.65739506e-02,
+              -2.54417695e-02, -3.58065143e-02, -5.06120026e-02,
+              -5.46549708e-02, 3.59449573e-02, 5.59705077e-04,
+              -2.30380464e-02, 1.52595676e-02, -6.69343099e-02,
+              4.55643842e-03, 2.28525754e-02, -3.72355767e-02,
+              -2.70220302e-02, -1.16989273e-03, 5.52339405e-02]]
+        )
+    )
+
+    NLI_BERT_BASE_CLS_MODEL_EMBEDDINGS = np.squeeze(
+        np.array(
+            [[-1.38045363e-02, -1.26324790e-02, 2.80067362e-02,
+              -3.94332707e-02, 4.21525203e-02, 3.48563381e-02,
+              4.26499359e-02, 1.80672705e-02, -2.30675992e-02,
+              -2.59304401e-02, -5.94409034e-02, 3.59071419e-02,
+              -5.27208410e-02, -2.69699586e-03, 6.43493012e-02,
+              -4.28358791e-03, 1.59534626e-02, 1.25811659e-02,
+              5.53512163e-02, -4.69161868e-02, -8.22236855e-03,
+              6.39215484e-02, 7.60666840e-03, -5.44207245e-02,
+              1.50170950e-02, -5.27593717e-02, 3.40173021e-03,
+              -2.46120291e-03, -1.74842868e-02, 3.20513360e-02,
+              5.35435043e-03, -2.40322035e-02, -1.82513501e-02,
+              -8.44017789e-03, -2.77171433e-02, -5.15357554e-02,
+              3.15882415e-02, -1.95944067e-02, -1.86955947e-02,
+              3.43169289e-04, -1.48254391e-02, -2.55950205e-02,
+              5.79135157e-02, -1.33917453e-02, -7.02805445e-02,
+              1.34170298e-02, -1.35798544e-01, 2.25282833e-02,
+              -2.62668189e-02, 3.36957946e-02, -8.78745243e-02,
+              8.17008503e-03, 6.72491863e-02, 2.84580011e-02,
+              -5.39750746e-03, 1.38973808e-02, 2.37578265e-02,
+              -4.52367291e-02, 5.53393271e-03, -4.76923352e-03,
+              6.64933259e-03, 8.94959923e-03, 4.96707968e-02,
+              2.28353385e-02, -1.58383921e-02, 6.58403337e-03,
+              2.70136036e-02, -5.15306182e-02, -6.82405382e-02,
+              5.82373664e-02, 1.42340157e-02, 1.39634556e-03,
+              -3.55863161e-02, 3.25084701e-02, -4.85322587e-02,
+              -1.56914983e-02, 1.68089801e-03, 3.16784494e-02,
+              4.48221480e-03, 4.57881540e-02, -2.11009867e-02,
+              4.71254475e-02, 4.13249061e-02, -1.61566655e-03,
+              -2.38496773e-02, 1.46785351e-02, 1.81302503e-02,
+              8.77589278e-04, -7.54943639e-02, 2.95870844e-02,
+              -2.37905979e-02, -2.77383458e-02, -2.33660303e-02,
+              -4.31852229e-02, 6.75797686e-02, 1.19550042e-02,
+              -3.73258702e-02, 2.40408592e-02, -1.30674234e-02,
+              -1.03089539e-02, -2.56236456e-02, -6.41170144e-02,
+              2.31279228e-02, -2.96398420e-02, -4.35341410e-02,
+              -1.18608596e-02, -2.42918935e-02, -3.52341197e-02,
+              -5.19590154e-02, 4.08839658e-02, 2.44715507e-03,
+              -1.98010430e-02, 5.92447147e-02, 3.20643606e-03,
+              -5.56434095e-02, 3.74478698e-02, -7.77755231e-02,
+              3.77231874e-02, -1.41348215e-02, 6.48891106e-02,
+              -5.50682582e-02, 2.84178909e-02, 3.42046027e-03,
+              1.45685999e-02, 2.42748708e-02, -3.39308381e-02,
+              4.16903831e-02, 4.44773957e-02, -6.30539060e-02,
+              -1.38155529e-02, 3.27304937e-03, 2.75869332e-02,
+              5.27329668e-02, 4.17043827e-02, -2.46828366e-02,
+              -3.69426375e-03, 1.51281999e-02, -3.29721496e-02,
+              -7.93246180e-03, 1.81145314e-02, -5.84085770e-02,
+              2.84974873e-02, 4.05289466e-03, -3.91773991e-02,
+              1.53927822e-02, -4.75471616e-02, -5.15790954e-02,
+              9.12845228e-03, -4.31800857e-02, 1.62325744e-02,
+              -1.71076115e-02, 4.15174216e-02, -2.62871403e-02,
+              -3.06025948e-02, -2.64176708e-02, 6.29329914e-03,
+              1.87084787e-02, 7.40983188e-02, -2.53543872e-02,
+              -8.93931463e-03, 1.16059789e-02, 5.62588349e-02,
+              5.19875400e-02, 3.65024339e-03, 3.44109870e-02,
+              1.73520651e-02, -1.15880580e-03, -4.31972072e-02,
+              9.08681192e-03, 4.49779816e-02, 6.06826730e-02,
+              -5.23631461e-02, -5.85136120e-04, -1.44744078e-02,
+              -5.34051880e-02, -1.59004834e-02, 1.60060935e-02,
+              3.74302492e-02, 9.25461203e-02, -4.45132963e-02,
+              -2.15530302e-02, 2.05617808e-02, 3.41037177e-02,
+              -9.27085131e-02, -8.03314429e-03, -3.73223796e-03,
+              2.52851117e-02, -1.02553274e-02, 5.83608486e-02,
+              -1.72788016e-02, 3.81971076e-02, -3.41951326e-02,
+              3.93162407e-02, -9.14255306e-02, 3.26433294e-02,
+              1.72617696e-02, -4.48362045e-02, 4.00994495e-02,
+              1.64799090e-03, 1.43023990e-02, 8.80771354e-02,
+              -5.11318631e-02, 4.16282192e-02, -6.33208547e-03,
+              3.51006500e-02, 7.66049996e-02, -5.60878254e-02,
+              5.18849306e-02, -2.00003828e-03, -2.78270487e-02,
+              -5.05064949e-02, 8.39368328e-02, 4.19133939e-02,
+              9.04264767e-03, 2.95757875e-02, -2.20852010e-02,
+              2.31264196e-02, -3.02072503e-02, 1.18131898e-02,
+              -1.17451865e-02, 2.35333778e-02, 8.67451169e-03,
+              3.97997834e-02, 2.68101301e-02, -5.67641407e-02,
+              -7.33581744e-03, 5.17354868e-02, -1.05423471e-02,
+              1.09437697e-01, 1.03249047e-02, 1.95929557e-02,
+              4.91198944e-03, 1.84405819e-02, 3.48240584e-02,
+              1.12121813e-02, 1.16120065e-02, -8.48978758e-03,
+              1.58214830e-02, 1.13368491e-02, 2.51315478e-02,
+              4.78463061e-02, 9.14337561e-02, -1.31221702e-02,
+              -2.13437993e-02, -6.32071048e-02, -2.44918615e-02,
+              7.14293048e-02, 2.39957846e-03, 4.01073339e-04,
+              2.99567711e-02, 1.17132324e-04, -4.24298830e-02,
+              -1.35866962e-02, -4.60037589e-02, -6.45992346e-03,
+              3.59253511e-02, -3.89481150e-02, -1.45485904e-03,
+              4.22752798e-02, 1.39497104e-03, -2.88896449e-02,
+              -1.02068596e-02, -6.60405606e-02, 1.40659837e-02,
+              -6.81355875e-03, -2.20663417e-02, 1.09322118e-02,
+              1.63183771e-02, 3.42491046e-02, 3.50330621e-02,
+              1.77631807e-02, -6.69359975e-03, -7.93624949e-03,
+              2.17554905e-02, 4.11989093e-02, -1.60218272e-02,
+              6.14417121e-02, 2.74969377e-02, -2.89977118e-02,
+              -5.77485305e-04, 1.25290286e-02, -3.36838304e-03,
+              -7.16019943e-02, 3.74039449e-02, 1.82345044e-02,
+              -4.12348360e-02, 3.42253633e-02, -5.36159202e-02,
+              -3.60822678e-02, -1.83748454e-02, -3.80816907e-02,
+              7.28133926e-03, -7.83245042e-02, -7.92451054e-02,
+              7.21210614e-02, 1.89389009e-02, 3.71034518e-02,
+              4.73285876e-02, -5.96028231e-02, -1.06078237e-02,
+              -2.47211661e-02, -1.75019284e-03, -1.16187353e-02,
+              2.29319017e-02, 4.57336791e-02, 9.28143691e-03,
+              9.33773350e-03, 3.22774909e-02, -8.69705081e-02,
+              -1.64810903e-02, -2.78827380e-02, -3.14949416e-02,
+              -1.49380853e-02, -2.31218785e-02, -3.43466438e-02,
+              1.12710791e-02, 1.71540920e-02, -5.38307382e-03,
+              1.19593577e-03, -4.42360640e-02, -2.55687125e-02,
+              4.13006879e-02, 2.10024659e-02, 3.30868475e-02,
+              -2.33111717e-02, 6.43895566e-03, -2.85872221e-02,
+              5.20928539e-02, -1.73622195e-03, 1.12925805e-02,
+              2.46119336e-03, 1.94990113e-02, -6.53448794e-03,
+              2.87583452e-02, -8.52482691e-02, 5.48473820e-02,
+              -2.25019753e-02, -7.50565007e-02, 3.58133800e-02,
+              2.62108719e-04, 6.71855360e-03, -2.99325325e-02,
+              -2.82514151e-02, 2.40493305e-02, 4.99187186e-02,
+              -2.16700621e-02, 3.83451171e-02, -8.42713788e-02,
+              6.42555347e-03, 1.09448060e-02, -2.97963563e-02,
+              -1.25807803e-02, -4.24578786e-02, 9.06305108e-03,
+              -6.18216768e-02, -4.73963730e-02, 3.89293134e-02,
+              4.51285653e-02, -5.73532358e-02, 4.82225530e-02,
+              4.48965542e-02, 2.68238187e-02, 2.44180057e-02,
+              2.54151188e-02, -5.76627534e-03, -2.44648289e-02,
+              3.50612924e-02, -1.35726109e-02, 7.10050389e-02,
+              -3.43175754e-02, -7.67040951e-03, -9.17334259e-02,
+              3.55509818e-02, -1.52201662e-02, -1.86821073e-02,
+              6.14645816e-02, -9.04140808e-03, -2.32551973e-02,
+              8.32157396e-03, -9.05250479e-03, 9.07492731e-03,
+              -5.62730320e-02, 7.72596756e-03, -9.68296602e-02,
+              4.79538739e-02, -1.61648309e-03, 3.63967605e-02,
+              -2.85648508e-03, 7.88790360e-03, -1.16378386e-02,
+              9.55225341e-03, -3.98814678e-03, 5.93707040e-02,
+              1.57538857e-02, 9.87393782e-03, -1.01059899e-01,
+              7.63127767e-03, 2.60274503e-02, 2.02106722e-02,
+              -5.46223996e-03, -2.90768314e-02, 4.96712625e-02,
+              6.30857348e-02, 4.50786349e-04, 2.71802079e-02,
+              -1.52121587e-02, 7.93779865e-02, -1.12200750e-03,
+              -6.59367889e-02, -4.90725115e-02, -6.85358234e-03,
+              5.16800657e-02, -9.86841973e-03, 1.17782773e-02,
+              -9.60045829e-02, 2.71879900e-02, 1.59297488e-03,
+              -1.76038351e-02, 4.71576564e-02, 3.59412096e-02,
+              -5.30479439e-02, 2.09758859e-02, -3.80521454e-02,
+              2.85601802e-02, -1.90537721e-02, 4.22683880e-02,
+              2.00102124e-02, -2.83400808e-02, 1.52540277e-03,
+              -6.69959113e-02, -2.86734030e-02, -6.68420037e-03,
+              -7.50311911e-02, 4.72005643e-03, -3.45429704e-02,
+              -4.62886121e-04, -5.26736714e-02, 5.49543388e-02,
+              -3.87938470e-02, -7.75126927e-03, 2.54016947e-02,
+              -4.34015505e-03, 3.05605680e-02, -5.07270917e-03,
+              3.62393283e-03, 5.47420233e-02, -1.26313273e-04,
+              2.47754040e-03, 1.30665274e-02, -2.95967162e-02,
+              -4.71629994e-03, 2.08164137e-02, -7.79596204e-03,
+              6.07454916e-03, 1.66969784e-02, 3.58711518e-02,
+              1.49399843e-02, -1.83998812e-02, 5.47269993e-02,
+              -3.74314897e-02, -2.31843740e-02, -1.28818871e-02,
+              -7.15285540e-02, -4.70744520e-02, 1.83860287e-02,
+              3.22536007e-02, -4.07805108e-02, 2.26784591e-02,
+              4.22362909e-02, 1.86444231e-04, -4.87948582e-02,
+              -4.35321741e-02, -7.99485669e-03, 3.50832520e-03,
+              2.52029422e-04, 3.98564711e-02, -4.47862446e-02,
+              3.28087918e-02, 2.37508211e-02, -2.60065980e-02,
+              3.71748954e-02, 5.29616252e-02, 1.13189528e-02,
+              -2.24010926e-02, -1.28271885e-03, 4.29592468e-02,
+              2.68180631e-02, 1.73901822e-02, 5.43172210e-02,
+              1.35355825e-02, -3.91236655e-02, 1.53188119e-02,
+              -5.97416684e-02, -2.63233613e-02, -1.61851719e-02,
+              8.79627652e-03, -1.67571083e-02, -6.47287145e-02,
+              3.68832089e-02, -1.36806294e-02, 3.32759842e-02,
+              1.57959200e-02, -1.96780954e-02, -6.02490967e-03,
+              -1.58948582e-02, -1.34384539e-02, -1.77003406e-02,
+              2.20466331e-02, -1.25331581e-02, 1.22764856e-02,
+              -5.05211297e-03, 4.79424372e-02, -6.27269372e-02,
+              -3.18896100e-02, -1.23048173e-02, 1.13159232e-03,
+              -9.36958287e-03, 2.30313707e-02, -2.39874143e-03,
+              -5.38537130e-02, -1.56427529e-02, -3.87173742e-02,
+              -4.58558928e-03, -7.15564489e-02, -5.35098696e-03,
+              4.37243432e-02, 4.37841713e-02, -3.55568752e-02,
+              2.52225902e-02, -4.74736206e-02, -2.48769677e-04,
+              -1.69400889e-02, -8.63008946e-03, 4.11478356e-02,
+              8.58931802e-03, 1.24860017e-04, -4.18979768e-03,
+              -7.32144248e-03, 3.08874585e-02, 3.23139280e-02,
+              2.69013904e-02, 4.51113656e-02, -5.64347254e-03,
+              -7.31476247e-02, -7.23212361e-02, -3.13672535e-02,
+              2.94222031e-02, -2.95080729e-02, 5.31540178e-02,
+              -2.26552058e-02, -1.46680530e-02, -2.33522933e-02,
+              -2.61158757e-02, -1.59751624e-02, -6.12260355e-03,
+              -2.99068280e-02, -1.01253819e-02, 5.65799773e-02,
+              1.55801494e-02, -9.76853538e-03, 3.20441537e-02,
+              -7.39495782e-03, 1.56227667e-02, 3.77561636e-02,
+              -5.90708368e-02, -9.09191743e-03, -4.58559878e-02,
+              5.29993996e-02, 5.37395626e-02, -1.22587532e-02,
+              -5.24723828e-02, -1.13596311e-02, 2.74823140e-02,
+              2.36147791e-02, -3.92370187e-02, 1.01046823e-02,
+              1.79001819e-02, -3.70946452e-02, -1.71488430e-02,
+              -3.12456973e-02, -1.79726183e-02, 5.20715900e-02,
+              7.52280578e-02, -3.66633423e-02, -3.44410539e-02,
+              4.63768467e-02, 5.01927286e-02, -5.97664230e-02,
+              -2.49792654e-02, 3.71258669e-02, 9.88026056e-03,
+              -3.64374951e-03, 8.04967713e-03, -1.27905598e-02,
+              1.59236789e-03, 1.32136988e-02, -3.92748155e-02,
+              6.98421150e-03, -4.26606135e-03, 4.41442318e-02,
+              -4.39220741e-02, 2.47516972e-03, -6.06678240e-02,
+              8.90192203e-03, -6.09061122e-02, 6.05002511e-03,
+              -1.40853375e-02, -3.11983787e-02, -1.93421282e-02,
+              3.80955525e-02, -1.47683313e-02, -4.81777489e-02,
+              -3.89053896e-02, -2.47424236e-03, 5.57187339e-03,
+              -7.82938749e-02, -4.64405343e-02, 4.55175266e-02,
+              4.28984165e-02, 1.36786168e-02, 2.50886828e-02,
+              -1.98131334e-03, 1.15832044e-02, 3.32416631e-02,
+              -3.00699174e-02, -3.87401953e-02, 1.66212078e-02,
+              -1.01425033e-03, 1.76178887e-02, -2.30543781e-02,
+              5.10412082e-02, 1.68139138e-03, 3.28061767e-02,
+              -9.70197562e-03, -1.47164660e-02, 5.07588312e-02,
+              -4.45683300e-02, 1.41167911e-02, 7.48501122e-02,
+              -2.28669471e-03, -2.29809768e-02, 3.25850658e-02,
+              1.22649437e-02, -4.49586660e-02, -1.23455962e-02,
+              1.34388264e-02, 2.80738361e-02, 1.73163153e-02,
+              4.79589291e-02, 1.73718277e-02, -3.24406140e-02,
+              -7.20741078e-02, 2.41176803e-02, -2.68644188e-02,
+              3.63147468e-03, -1.69604328e-02, -6.10141549e-03,
+              1.15496721e-02, 2.72606835e-02, -1.42320408e-03,
+              2.10313872e-02, -3.11159412e-03, 1.75888129e-02,
+              4.69241925e-02, 2.79321335e-02, -9.49071255e-03,
+              -8.62548873e-03, 1.31939827e-02, -1.35884443e-02,
+              -4.31307235e-05, 7.52040651e-03, -3.39714810e-02,
+              4.38810699e-03, -2.90142670e-02, 1.11061800e-02,
+              -3.39282826e-02, 8.71872082e-02, -1.33918030e-02,
+              2.94200815e-02, -1.47213526e-02, 2.68452987e-02,
+              3.78494971e-02, -1.61408763e-02, -8.55053682e-03,
+              -2.46409029e-02, 2.35677417e-02, 2.94158142e-02,
+              -6.38753846e-02, -1.36387218e-02, 3.08700856e-02,
+              1.50884604e-02, 1.09616742e-02, -2.23893747e-02,
+              -8.72588158e-02, 1.42631563e-03, 6.42264485e-02,
+              -2.67157853e-02, -4.21791570e-03, -1.03313893e-01,
+              -1.01346679e-01, -1.94876958e-02, -3.72002758e-02,
+              -4.64463606e-02, 1.91609282e-02, 2.57606376e-02,
+              -2.49569211e-02, 5.09973168e-02, -4.25925851e-03,
+              8.56407825e-03, -2.00823974e-02, -1.97362644e-03,
+              -1.40387211e-02, 2.16900483e-02, 3.32145765e-02,
+              -3.89750488e-02, 5.49208513e-03, 1.54631743e-02,
+              1.39654987e-02, 5.94044663e-02, -1.74135081e-02,
+              -6.24101236e-03, 3.39858271e-02, -1.59896035e-02,
+              6.56539872e-02, -1.55073768e-02, 1.26004219e-02,
+              5.30480593e-02, 1.66684166e-02, -2.65195146e-02,
+              -2.83525735e-02, -2.48214863e-02, 2.43366603e-02,
+              -8.61613154e-02, 2.53458507e-02, 1.49776191e-02,
+              4.16353866e-02, -1.40828900e-02, 2.77333986e-02,
+              1.26076955e-02, 3.32824588e-02, -3.75509560e-02,
+              6.09228760e-02, -1.58846229e-02, 1.83025636e-02,
+              -6.34610355e-02, -2.62076668e-02, 1.18369097e-02,
+              -2.27384828e-02, -4.23055142e-02, 9.78730898e-03,
+              6.02663821e-03, -9.79444850e-03, 5.02627203e-03,
+              -1.75426230e-02, -3.84047665e-02, -2.48221774e-02,
+              -3.05508953e-02, -5.34480345e-03, 5.55603206e-03,
+              -3.87982316e-02, -1.79277454e-02, 1.59406569e-02,
+              -5.81073314e-02, -2.23582499e-02, -1.50005086e-04,
+              -1.80699993e-02, -6.19800314e-02, 1.43732652e-02]]
+        )
+    )
+
+    def test_initialize_huggingface_model(self):
+        model_properties = {
+            "name": "test-model",
+            "type": "hf",
+            "poolingMethod": "mean"
+        }
+        model = HuggingFaceModel(model_properties, "cpu", {})
+        self.assertIsInstance(model, HuggingFaceModel)
+        self.assertEqual("cpu", model.device)
+        self.assertEqual({}, model.model_auth)
+        self.assertEqual("test-model", model.model_properties.name)
+        self.assertEqual("hf", model.model_properties.type)
+        self.assertEqual(PoolingMethod.Mean, model.model_properties.pooling_method)
+
+    def test_initialize_huggingface_model_with_invalid_properties(self):
+        test_cases = [
+            (
+
+                {
+                    "name": "test-model",
+                    "type": "hf",
+                    "pooling_method": "invalid_pooling_method"
+                },
+                "Invalid pooling method",
+            ),
+            (
+                {},
+                "Invalid model properties for the 'hf' model. Missing required fields."
+            ),
+            (
+                {
+                    "modelLocation": {
+                        "hf": {
+                            # missing "repoId"
+                            "filename": "test-filename"
+                        }
+                    },
+                    "type": "hf"
+                },
+                "Invalid modelLocation format"
+            ),
+            (
+                {
+                    "type": "hf",
+                    "pooling_method": "cls"
+                },
+                "Name is not provided"
+            ),
+            (
+                {
+                    "name": "test-model",
+                    "pooling_method": "cls"
+                },
+                "Type is not provided"
+            ),
+            (
+                {
+                    "url": "http://example.com",
+                    "modelLocation": {
+                        "hf": {
+                            "repoId": "test-repo-id",
+                            "filename": "test-filename"
+                        }
+                    },
+                    "type": "hf"
+                },
+                "url and modelLocation are provided at the same time"
+            ),
+        ]
+        for test_case, msg in test_cases:
+            with self.subTest(test_case=test_case):
+                with self.assertRaises(InvalidModelPropertiesError) as excinfo:
+                    _ = HuggingFaceModel(test_case, "cpu", {})
+                self.assertIn("Invalid model properties for the 'hf' model.", str(excinfo.exception))
+
+    def test_hf_e5_base_v2_embeddings_load_from_hf(self):
+        """A test to ensure the embeddings are generated correctly for the default text model, loading from
+        hf."""
+        model_properties = {
+            "name": 'intfloat/e5-base-v2',
+            "dimensions": 768,
+            "tokens": 512,
+            "type": "hf",
+            "model_size": 0.438,
+            "text_query_prefix": "query: ",
+            "text_chunk_prefix": "passage: ",
+            "notes": ""
+        }
+
+        model = HuggingFaceModel(model_properties, "cpu", {})
+        model.load()
+        embeddings = model.encode(['query: how much protein should a female eat'])
+        difference = np.linalg.norm(embeddings - self.E5_BASE_V2_MODEL_EMBEDDINGS) / len(
+            embeddings)
+        self.assertLess(difference, 1e-4, f"Text embeddings for model {model_properties} "
+                                          f"are not close enough. The average difference is: {difference}")
+
+    def test_hf_e5_base_v2_embeddings_load_from_zip_file(self):
+        """A test to ensure the embeddings are generated correctly for the default text model, loading from
+        a zip file on s3."""
+        model_properties = {
+            "url": "https://marqo-ecs-50-audio-test-dataset.s3.amazonaws.com/e5-li.zip",
+            "dimensions": 768,
+            "type": "hf",
+        }
+        model = HuggingFaceModel(model_properties, "cpu", {})
+        model.load()
+        embeddings = model.encode(['query: how much protein should a female eat'])
+        difference = np.linalg.norm(embeddings - self.E5_BASE_V2_MODEL_EMBEDDINGS) / len(
+            embeddings)
+        self.assertLess(difference, 1e-4, f"Text embeddings for model {model_properties} "
+                                          f"are not close enough. The average difference is: {difference}")
+
+    @mock.patch("transformers.AutoModel.from_pretrained", return_value=mock.MagicMock())
+    @mock.patch("transformers.AutoTokenizer.from_pretrained", side_effect=OSError("Tokenizer load failed"))
+    def test_tokenizer_loading_failure(self, mock_auto_model, mock_auto_tokenizer):
+        """Test that an error is raised when the tokenizer fails to load."""
+        model_properties = {
+            "name": "test-model",
+            "type": "hf",
+            "poolingMethod": "mean"
+        }
+
+        with self.assertRaises(InvalidModelPropertiesError) as excinfo:
+            model = HuggingFaceModel(model_properties, "cpu", {})
+            model.load()
+
+        self.assertIn("Tokenizer load failed", str(excinfo.exception))
+
+    @mock.patch("transformers.AutoModel.from_pretrained", side_effect=OSError("Model load failed"))
+    @mock.patch("transformers.AutoTokenizer.from_pretrained", return_value=mock.MagicMock())
+    def test_model_loading_failure(self, mock_auto_model, mock_auto_tokenizer):
+        """Test that an error is raised when the model fails to load."""
+        model_properties = {
+            "name": "test-model",
+            "type": "hf",
+            "poolingMethod": "mean"
+        }
+
+        with self.assertRaises(InvalidModelPropertiesError) as excinfo:
+            model = HuggingFaceModel(model_properties, "cpu", {})
+            model.load()
+
+        self.assertIn("Model load failed", str(excinfo.exception))
+
+    def test_sentence_transformers_nli_bert_base_cls_pooling_embeddings(self):
+        """A test to ensure the embeddings are generated correctly for the default text model, loading from hf.
+
+        This model uses CLS pooling method.
+        """
+        model_properties = {
+            "name": "sentence-transformers/nli-bert-base-cls-pooling",
+            "dimensions": 768,
+            "type": "hf",
+        }
+
+        model = HuggingFaceModel(model_properties, "cpu", {})
+        model.load()
+        embeddings = model.encode(['This is an example sentence'])
+        difference = np.linalg.norm(embeddings - self.NLI_BERT_BASE_CLS_MODEL_EMBEDDINGS) / len(
+            embeddings)
+        self.assertLess(difference, 1e-4, f"Text embeddings for model {model_properties} "
+                                          f"are not close enough. The average difference is: {difference}")
+
+        difference_for_different_models = np.linalg.norm(embeddings - self.E5_BASE_V2_MODEL_EMBEDDINGS) / len(
+            embeddings)
+
+        self.assertGreater(
+            difference_for_different_models, 1,
+            f"Text embeddings for two different models are too close. "
+            f"There is a problem with the test data or bug in the code."
+        )
diff --git a/tests/core/inference/test_hugging_face_model_properties.py b/tests/core/inference/test_hugging_face_model_properties.py
index 0874986c5..f18251f6e 100644
--- a/tests/core/inference/test_hugging_face_model_properties.py
+++ b/tests/core/inference/test_hugging_face_model_properties.py
@@ -1,9 +1,11 @@
 import unittest
-from marqo.core.inference.models.hugging_face_model_properties import HuggingFaceModelProperties, PoolingMethod
+from unittest import mock
+
 from pydantic import ValidationError
-from marqo.tensor_search.models.private_models import ModelLocation, ModelAuth
+
+from marqo.core.inference.models.hugging_face_model_properties import HuggingFaceModelProperties, PoolingMethod
 from marqo.tensor_search.models.external_apis.hf import HfModelLocation
-from unittest import mock
+from marqo.tensor_search.models.private_models import ModelLocation
 
 
 class TestHuggingFaceModelProperties(unittest.TestCase):
@@ -52,8 +54,11 @@ def test_infer_pooling_method(self):
                 self.assertEqual(pooling_method, model.pooling_method)
 
     def test_explicit_valid_pooling_method(self):
-        model = HuggingFaceModelProperties(name="test-model", type="hf", pooling_method=PoolingMethod.CLS)
+        with mock.patch("marqo.core.inference.models.hugging_face_model_properties."
+                        "HuggingFaceModelProperties._infer_pooling_method_from_name") as mock_infer:
+            model = HuggingFaceModelProperties(name="test-model", type="hf", pooling_method=PoolingMethod.CLS)
         self.assertEqual(model.pooling_method, PoolingMethod.CLS)
+        mock_infer.assert_not_called()
 
     def test_explicit_invalid_pooling_method(self):
         with self.assertRaises(ValidationError) as excinfo:

From 6fe1f4036b55a621aff14bd0cbeba338a3889bdc Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Fri, 20 Sep 2024 14:47:04 +1000
Subject: [PATCH 27/63] Fixing tests

---
 .../model_downloading/test_corrupt_file_error_handling.py       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py b/tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py
index eb7fe776c..69b3aef7d 100644
--- a/tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py
+++ b/tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py
@@ -6,7 +6,7 @@
 
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.errors import InvalidModelPropertiesError
-from marqo.s2_inference.hf_utils import extract_huggingface_archive
+from marqo.core.inference.models.hugging_face_model import extract_huggingface_archive
 from marqo.s2_inference.s2_inference import _load_model
 
 

From c9e4cdf61e632f923b4a587ab062e5dbb8c3d2ad Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Fri, 20 Sep 2024 15:09:09 +1000
Subject: [PATCH 28/63] Fixing tests

---
 src/marqo/s2_inference/model_registry.py      |  1 -
 .../test_corrupt_file_error_handling.py       |  8 ++++----
 tests/tensor_search/test_model_auth.py        | 20 +++++++++----------
 3 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/src/marqo/s2_inference/model_registry.py b/src/marqo/s2_inference/model_registry.py
index d2169a1b4..a635b9ca5 100644
--- a/src/marqo/s2_inference/model_registry.py
+++ b/src/marqo/s2_inference/model_registry.py
@@ -1,6 +1,5 @@
 from marqo.s2_inference.clip_utils import CLIP, OPEN_CLIP, MULTILINGUAL_CLIP, FP16_CLIP, \
     get_multilingual_clip_properties
-# from marqo.s2_inference.hf_utils import HF_MODEL
 from marqo.core.inference.models.hugging_face_model import HuggingFaceModel
 from marqo.s2_inference.onnx_clip_utils import CLIP_ONNX
 from marqo.s2_inference.random_utils import Random
diff --git a/tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py b/tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py
index 69b3aef7d..8e022badc 100644
--- a/tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py
+++ b/tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py
@@ -204,7 +204,7 @@ def test_regular_file(self):
         with patch('os.path.isfile', return_value=True), \
              patch('os.path.splitext', return_value=('/path/to/file', '.txt')), \
              patch('os.makedirs'), \
-             patch("marqo.s2_inference.hf_utils.download_model", return_value = "/path/to/file.txt"):
+             patch("marqo.core.inference.models.hugging_face_model.download_model", return_value = "/path/to/file.txt"):
             for model_properties in self.dummy_model_properties:
                 with self.assertRaises(RuntimeError) as context:
                     _ = _load_model(**self.load_parameters, model_properties=model_properties)
@@ -215,7 +215,7 @@ def test_zip_file(self):
              patch('os.path.splitext', return_value=('/path/to/file', '.zip')), \
              patch('os.makedirs') as mock_makedirs, \
              patch('zipfile.ZipFile') as mock_zipfile, \
-             patch("marqo.s2_inference.hf_utils.download_model", return_value="/path/to/file.zip"),\
+             patch("marqo.core.inference.models.hugging_face_model.download_model", return_value="/path/to/file.zip"),\
              patch("transformers.AutoModel.from_pretrained") as mock_model,\
              patch("transformers.AutoTokenizer.from_pretrained") as mock_tokenizer:
 
@@ -237,7 +237,7 @@ def test_tar_file(self):
             patch('os.path.splitext', return_value=('/path/to/file', '.tar')), \
             patch('os.makedirs') as mock_makedirs, \
             patch('tarfile.open') as mock_tarfile,\
-            patch("marqo.s2_inference.hf_utils.download_model", return_value="/path/to/file.tar"), \
+            patch("marqo.core.inference.models.hugging_face_model.download_model", return_value="/path/to/file.tar"), \
             patch("transformers.AutoModel.from_pretrained") as mock_model, \
             patch("transformers.AutoTokenizer.from_pretrained") as mock_tokenizer:
 
@@ -256,7 +256,7 @@ def test_tar_file(self):
 
     def test_directory(self):
         with patch('os.path.isfile', return_value=False),\
-            patch("marqo.s2_inference.hf_utils.download_model", return_value="/path/to/file.tar"), \
+            patch("marqo.core.inference.models.hugging_face_model.download_model", return_value="/path/to/file.tar"), \
             patch("transformers.AutoModel.from_pretrained") as mock_model, \
             patch("transformers.AutoTokenizer.from_pretrained") as mock_tokenizer:
             self.assertEqual(extract_huggingface_archive('/path/to/directory'), '/path/to/directory')
diff --git a/tests/tensor_search/test_model_auth.py b/tests/tensor_search/test_model_auth.py
index 2a978ab57..d46f46496 100644
--- a/tests/tensor_search/test_model_auth.py
+++ b/tests/tensor_search/test_model_auth.py
@@ -16,7 +16,7 @@
 from marqo.s2_inference.s2_inference import clear_loaded_models
 from transformers import AutoModel, AutoTokenizer
 from marqo.s2_inference.processing.custom_clip_utils import download_pretrained_from_url
-from marqo.s2_inference.hf_utils import extract_huggingface_archive
+from marqo.core.inference.models.hugging_face_model import extract_huggingface_archive
 import os
 from marqo.api.exceptions import BadRequestError, ModelNotInCacheError
 from marqo.tensor_search.models.api_models import BulkSearchQuery, BulkSearchQueryEntity
@@ -1237,7 +1237,7 @@ def test_1_load_model_from_hf_zip_file_with_auth_search(self):
         with unittest.mock.patch('transformers.AutoModel.from_pretrained', mock_automodel_from_pretrained):
             with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained', mock_autotokenizer_from_pretrained):
                 with unittest.mock.patch('marqo.s2_inference.model_downloading.from_hf.hf_hub_download', mock_hf_hub_download):
-                    with unittest.mock.patch("marqo.s2_inference.hf_utils.extract_huggingface_archive", mock_extract_huggingface_archive):
+                    with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.extract_huggingface_archive", mock_extract_huggingface_archive):
                         try:
                             res = tensor_search.search(
                                 config=self.config, text='hello', index_name=self.index_name_1,
@@ -1297,7 +1297,7 @@ def test_2_load_model_from_hf_zip_file_without_auth_search(self):
         with unittest.mock.patch('transformers.AutoModel.from_pretrained', mock_automodel_from_pretrained):
             with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained', mock_autotokenizer_from_pretrained):
                 with unittest.mock.patch('marqo.s2_inference.model_downloading.from_hf.hf_hub_download', mock_hf_hub_download):
-                    with unittest.mock.patch("marqo.s2_inference.hf_utils.extract_huggingface_archive", mock_extract_huggingface_archive):
+                    with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.extract_huggingface_archive", mock_extract_huggingface_archive):
                         try:
                             res = tensor_search.search(
                                 config=self.config, text='hello', index_name=self.index_name_1,)
@@ -1368,7 +1368,7 @@ def test_3_load_model_from_s3_zip_file_with_auth_search(self):
             with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained',mock_autotokenizer_from_pretrained):
                 with unittest.mock.patch('boto3.client', return_value=mock_s3_client) as mock_boto3_client:
                     with unittest.mock.patch("marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url", mock_download_pretrained_from_url):
-                        with unittest.mock.patch("marqo.s2_inference.hf_utils.extract_huggingface_archive", mock_extract_huggingface_archive):
+                        with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.extract_huggingface_archive", mock_extract_huggingface_archive):
                             try:
                                 res = tensor_search.search(
                                     config=self.config, text='hello', index_name=self.index_name_1,
@@ -1422,7 +1422,7 @@ def test_4_load_model_from_public_url_zip_file_search(self):
 
         with mock.patch('transformers.AutoModel.from_pretrained', new=mock_automodel_from_pretrained):
             with mock.patch('marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url', new=mock_download):
-                with mock.patch("marqo.s2_inference.hf_utils.extract_huggingface_archive", new=mock_extract_huggingface_archive):
+                with mock.patch("marqo.core.inference.models.hugging_face_model.extract_huggingface_archive", new=mock_extract_huggingface_archive):
                     res = tensor_search.search(config=self.config, text='hello', index_name=self.index_name_1)
 
         assert len(mock_extract_huggingface_archive.call_args_list) == 1
@@ -1568,7 +1568,7 @@ def test_1_load_model_from_hf_zip_file_with_auth_add_documents(self):
         with unittest.mock.patch('transformers.AutoModel.from_pretrained', mock_automodel_from_pretrained):
             with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained', mock_autotokenizer_from_pretrained):
                 with unittest.mock.patch('marqo.s2_inference.model_downloading.from_hf.hf_hub_download', mock_hf_hub_download):
-                    with unittest.mock.patch("marqo.s2_inference.hf_utils.extract_huggingface_archive", mock_extract_huggingface_archive):
+                    with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.extract_huggingface_archive", mock_extract_huggingface_archive):
                         try:
                             tensor_search.add_documents(config=self.config, add_docs_params=AddDocsParams(
                                 index_name=self.index_name_1, auto_refresh=True, docs=[{'a': 'b'}],
@@ -1628,7 +1628,7 @@ def test_2_load_model_from_hf_zip_file_without_auth_add_documents(self):
         with unittest.mock.patch('transformers.AutoModel.from_pretrained', mock_automodel_from_pretrained):
             with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained', mock_autotokenizer_from_pretrained):
                 with unittest.mock.patch('marqo.s2_inference.model_downloading.from_hf.hf_hub_download', mock_hf_hub_download):
-                    with unittest.mock.patch("marqo.s2_inference.hf_utils.extract_huggingface_archive", mock_extract_huggingface_archive):
+                    with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.extract_huggingface_archive", mock_extract_huggingface_archive):
                         try:
                             tensor_search.add_documents(config=self.config, add_docs_params=AddDocsParams(
                                 index_name=self.index_name_1, auto_refresh=True, docs=[{'a': 'b'}], device="cpu"))
@@ -1699,7 +1699,7 @@ def test_3_load_model_from_s3_zip_file_with_auth_add_documents(self):
                     with unittest.mock.patch(
                             "marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url",
                             mock_download_pretrained_from_url):
-                        with unittest.mock.patch("marqo.s2_inference.hf_utils.extract_huggingface_archive",
+                        with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.extract_huggingface_archive",
                                                  mock_extract_huggingface_archive):
                             try:
                                 tensor_search.add_documents(config=self.config, add_docs_params=AddDocsParams(
@@ -1756,7 +1756,7 @@ def test_4_load_model_from_public_url_zip_file_add_documents(self):
 
         with mock.patch('transformers.AutoModel.from_pretrained', new=mock_automodel_from_pretrained):
             with mock.patch('marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url', new=mock_download):
-                with mock.patch("marqo.s2_inference.hf_utils.extract_huggingface_archive", new=mock_extract_huggingface_archive):
+                with mock.patch("marqo.core.inference.models.hugging_face_model.extract_huggingface_archive", new=mock_extract_huggingface_archive):
                     tensor_search.add_documents(config=self.config, add_docs_params=AddDocsParams(
                         index_name=self.index_name_1, auto_refresh=True, docs=[{'a': 'b'}], device="cpu"))
 
@@ -2477,7 +2477,7 @@ def test_bulk_search(self):
                 with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained', mock_autotokenizer_from_pretrained):
                     with unittest.mock.patch('boto3.client', return_value=mock_s3_client) as mock_boto3_client:
                         with unittest.mock.patch("marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url",mock_download_pretrained_from_url):
-                            with unittest.mock.patch("marqo.s2_inference.hf_utils.extract_huggingface_archive", mock_extract_huggingface_archive):
+                            with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.extract_huggingface_archive", mock_extract_huggingface_archive):
                                 try:
                                     tensor_search.bulk_search(
                                         query=bulk_search_query,

From 45c99bc36bd7cf4d832e2ea21a48357f01cbf578 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Tue, 24 Sep 2024 22:33:04 +1000
Subject: [PATCH 29/63] Fix tests

---
 .../s2_inference/model_downloading/from_hf.py |  2 +-
 .../tensor_search/models/private_models.py    | 27 +++----------------
 .../test_corrupt_file_error_handling.py       |  4 +--
 tests/s2_inference/test_clip_utils.py         |  4 ++-
 4 files changed, 9 insertions(+), 28 deletions(-)

diff --git a/src/marqo/s2_inference/model_downloading/from_hf.py b/src/marqo/s2_inference/model_downloading/from_hf.py
index 1f68ec746..21c40ac97 100644
--- a/src/marqo/s2_inference/model_downloading/from_hf.py
+++ b/src/marqo/s2_inference/model_downloading/from_hf.py
@@ -2,7 +2,7 @@
 from typing import Optional
 from huggingface_hub import hf_hub_download
 from marqo.s2_inference.logger import get_logger
-from huggingface_hub.errors import RepositoryNotFoundError
+from huggingface_hub.utils import RepositoryNotFoundError
 from marqo.s2_inference.errors import ModelDownloadError
 
 logger = get_logger(__name__)
diff --git a/src/marqo/tensor_search/models/private_models.py b/src/marqo/tensor_search/models/private_models.py
index 429f92fa3..ea3f197ef 100644
--- a/src/marqo/tensor_search/models/private_models.py
+++ b/src/marqo/tensor_search/models/private_models.py
@@ -8,7 +8,7 @@
 from marqo.api.exceptions import InvalidArgError
 from typing import Optional
 from pydantic import Field
-from marqo.base_model import ImmutableBaseModel
+from marqo.base_model import ImmutableBaseModel, MarqoBaseModel
 
 class ModelAuth(ImmutableBaseModel):
     """TODO: insert links to docs in error message"""
@@ -35,14 +35,11 @@ def _ensure_exactly_one_auth_method(cls, v, values, field):
         return v
 
 
-class ModelLocation(BaseModel):
-
-    class Config:
-        allow_mutation = True
+class ModelLocation(MarqoBaseModel):
 
     s3: Optional[S3Location] = None
     hf: Optional[HfModelLocation] = None
-    auth_required: bool = Field(default=False, alias="authRequired")
+    auth_required: bool = Field(default=False, aliase="authRequired")
 
 
     @root_validator(skip_on_failure=True)
@@ -58,21 +55,3 @@ def _validate_s3_and_hf(cls, values):
                 "More than one model location object was provided. "
                 "Only one model location object is allowed")
         return values
-    #
-    # @validator('s3', 'hf', pre=True, always=True)
-    # def _ensure_exactly_one_location(cls, v, values, field):
-    #     """TODO: insert links to docs in error message"""
-    #     other_field = 's3' if field.name == 'hf' else 'hf'
-    #     if other_field in values and values[other_field] is not None and v is not None:
-    #         raise InvalidArgError(
-    #             "More than one model location object was provided. "
-    #             "Only one model authentication object is allowed")
-    #     return v
-    #
-    # def __init__(self, **data):
-    #     super().__init__(**data)
-    #     if self.s3 is None and self.hf is None:
-    #         raise InvalidArgError(
-    #             "Missing model location object. A location object, for example `s3` or  "
-    #             "`hf`, must be provided. ")
-
diff --git a/tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py b/tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py
index 8e022badc..a1d8e9c27 100644
--- a/tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py
+++ b/tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py
@@ -224,7 +224,7 @@ def test_zip_file(self):
 
                 mock_makedirs.assert_called_once_with('/path/to/file', exist_ok=True)
                 mock_zipfile.assert_called_once_with('/path/to/file.zip', 'r')
-                mock_model.assert_called_once_with('/path/to/file', use_auth_token=None, cache_dir = ModelCache.hf_cache_path)
+                mock_model.assert_called_once_with('/path/to/file')
                 mock_tokenizer.assert_called_once_with('/path/to/file')
 
                 mock_makedirs.reset_mock()
@@ -246,7 +246,7 @@ def test_tar_file(self):
 
                 mock_makedirs.assert_called_once_with('/path/to/file', exist_ok=True)
                 mock_tarfile.assert_called_once_with('/path/to/file.tar', 'r')
-                mock_model.assert_called_once_with('/path/to/file', use_auth_token=None, cache_dir=ModelCache.hf_cache_path)
+                mock_model.assert_called_once_with('/path/to/file')
                 mock_tokenizer.assert_called_once_with('/path/to/file')
 
                 mock_makedirs.reset_mock()
diff --git a/tests/s2_inference/test_clip_utils.py b/tests/s2_inference/test_clip_utils.py
index 9356bdf8b..b7325732e 100644
--- a/tests/s2_inference/test_clip_utils.py
+++ b/tests/s2_inference/test_clip_utils.py
@@ -106,7 +106,9 @@ class TestDownloadFromRepo(unittest.TestCase):
     def test__download_from_repo_with_auth(self, mock_download_model, ):
         mock_download_model.return_value = 'model.pth'
         location = ModelLocation(
-            s3=S3Location(Bucket='some_bucket', Key='some_key'), auth_required=True)
+            s3=S3Location(Bucket='some_bucket', Key='some_key'),
+            auth_required=True
+        )
         s3_auth = S3Auth(aws_access_key_id='some_key_id', aws_secret_access_key='some_secret')
 
         model_props = {

From fc2c0d9d20a63e999414f654e59ba162708a49b9 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 25 Sep 2024 11:56:46 +1000
Subject: [PATCH 30/63] Fix all the tests

---
 src/marqo/core/document/document.py           |  2 +-
 src/marqo/logging.py                          |  3 +-
 src/marqo/marqo_docs.py                       | 30 +++++++++++--------
 src/marqo/s2_inference/clip_utils.py          |  4 +--
 src/marqo/s2_inference/processing/image.py    |  1 -
 src/marqo/s2_inference/s2_inference.py        |  7 ++---
 .../models/score_modifiers_object.py          |  4 +--
 .../tensor_search/tensor_search_logging.py    |  4 ++-
 8 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/src/marqo/core/document/document.py b/src/marqo/core/document/document.py
index 6d8ae750c..525dfa720 100644
--- a/src/marqo/core/document/document.py
+++ b/src/marqo/core/document/document.py
@@ -233,7 +233,7 @@ def translate_vespa_document_response(self, status: int, message: Optional[str]=
         elif status == 507:
             return 400, "Marqo vector store is out of memory or disk space"
         # TODO Block the invalid special characters before sending to Vespa
-        elif status == 400 and isinstance(message, str) and "could not parse field" in message:
+        elif status == 400 and isinstance(message, str) and "could not parse field" in message.lower():
             return 400, f"The document contains invalid characters in the fields. Original error: {message} "
         else:
             logger.error(f"An unexpected error occurred from the Vespa document response. "
diff --git a/src/marqo/logging.py b/src/marqo/logging.py
index 261f147a6..85d7d9e11 100644
--- a/src/marqo/logging.py
+++ b/src/marqo/logging.py
@@ -2,6 +2,7 @@
 
 from marqo.api.exceptions import EnvVarError
 from marqo.tensor_search.utils import read_env_vars_and_defaults
+from marqo_docs import configuring_marqo
 
 
 def get_logger(name):
@@ -20,6 +21,6 @@ def get_logger(name):
     else:
         raise EnvVarError(f"The provided environment variable `MARQO_LOG_LEVEL` = `{log_level}` is not supported."
                           f"The environment variable `MARQO_LOG_LEVEL` should be one of `error`, `warning`, `info`, `debug`."
-                          f"Check https://docs.marqo.ai/0.0.13/Advanced-Usage/configuration/ for more info.")
+                          f"Check {configuring_marqo()} for more info.")
 
     return logger
diff --git a/src/marqo/marqo_docs.py b/src/marqo/marqo_docs.py
index 4b7099d81..583eaf555 100644
--- a/src/marqo/marqo_docs.py
+++ b/src/marqo/marqo_docs.py
@@ -14,39 +14,39 @@ def _build_url(path):
 
 
 def configuring_marqo():
-    return _build_url('Guides/Advanced-Usage/configuration/')
+    return _build_url('other-resources/guides/advanced-usage/configuration/')
 
 
 def create_index():
-    return _build_url('API-Reference/Indexes/create_index/')
+    return _build_url('reference/api/indexes/create-index/')
 
 
 def multimodal_combination_object():
-    return _build_url('Guides/Advanced-Usage/document_fields/#multimodal-combination-object/')
+    return _build_url('other-resources/guides/advanced-usage/document-fields/#multimodal-combination-object')
 
 
 def custom_vector_object():
-    return _build_url('Guides/Advanced-Usage/document_fields/#custom-vectors/')
+    return _build_url('other-resources/guides/advanced-usage/document-fields/#custom-vector-object')
 
 
 def mappings():
-    return _build_url('API-Reference/Documents/mappings/')
+    return _build_url('reference/api/documents/mappings/')
 
 
 def map_fields():
-    return _build_url('API-Reference/Documents/add_or_replace_documents/#map-fields/')
+    return _build_url('reference/api/documents/add-or-replace-documents/#map-fields')
 
 
 def list_of_models():
-    return _build_url('Guides/Models-Reference/list_of_models/')
+    return _build_url('models/marqo/list-of-models/')
 
 
 def search_context():
-    return _build_url('API-Reference/Search/search/#context')
+    return _build_url('reference/api/search/search/#context')
 
 
 def configuring_preloaded_models():
-    return _build_url('Guides/Advanced-Usage/configuration/#configuring-preloaded-models')
+    return _build_url('other-resources/guides/advanced-usage/configuration/#configuring-preloaded-models')
 
 
 def bring_your_own_model():
@@ -54,12 +54,18 @@ def bring_your_own_model():
 
 
 def query_reference():
-    return _build_url('API-Reference/Search/search/#query-q')
+    return _build_url('reference/api/search/search/#query-q')
 
 
 def indexing_images():
-    return _build_url('Guides/Advanced-Usage/images/')
+    return _build_url('other-resources/guides/advanced-usage/images/')
 
 
 def api_reference_document_body():
-    return _build_url('API-Reference/Documents/add_or_replace_documents/#body')
+    return _build_url('reference/api/documents/add-or-replace-documents/#body')
+
+def generic_models():
+    return _build_url('models/marqo/list-of-models/#generic-clip-models')
+
+def search_api_score_modifiers_parameter():
+    return _build_url('reference/api/search/search/#score-modifiers')
\ No newline at end of file
diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index 46d9aa7c1..c9d760cd2 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -377,7 +377,7 @@ def load(self) -> None:
                 raise InvalidModelPropertiesError(f"Marqo can not load the custom clip model."
                                                   f"The provided model path `{path}` is neither a local file nor a valid url."
                                                   f"Please check your provided model url and retry"
-                                                  f"Check `https://docs.marqo.ai/0.0.12/Models-Reference/dense_retrieval/` for more info.")
+                                                  f"Check {marqo_docs.bring_your_own_model()} for more info.")
 
             self.jit = self.model_properties.get("jit", False)
             self.model, self.preprocess = self.custom_clip_load()
@@ -510,7 +510,7 @@ def __init__(self, model_type: str = "fp16/ViT-B/32", device: str = None, embedd
                            f"FP16 clip model `{self.model_type}` is only available with device `cuda`.\n"
                            f"With current device `{self.device}`, the model will be loaded in `float32` mode. \n"
                            f"Please check you cuda availability or try the fp32 version `{self.model_type.replace('fp16/', '')}`"
-                           f"Check `https://docs.marqo.ai/0.0.13/Models-Reference/dense_retrieval/#generic-clip-models` for more info.")
+                           f"Check {marqo_docs.generic_models()} for more info.")
 
         self.model_name = self.model_type.replace("fp16/", "")
 
diff --git a/src/marqo/s2_inference/processing/image.py b/src/marqo/s2_inference/processing/image.py
index cf972242c..c4bd2799f 100644
--- a/src/marqo/s2_inference/processing/image.py
+++ b/src/marqo/s2_inference/processing/image.py
@@ -213,7 +213,6 @@ def _load_and_cache_model(self):
                 "Request rejected, as this request attempted to load and cache the model, "
                 "but the lock is already held by another operation. "
                 "Please wait for a few seconds and send the request again.\n"
-                "Marqo's documentation can be found here: `https://docs.marqo.ai/latest/`"
             )
 
         with _load_model_lock:
diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py
index 0aaeaa312..14af1f370 100644
--- a/src/marqo/s2_inference/s2_inference.py
+++ b/src/marqo/s2_inference/s2_inference.py
@@ -285,9 +285,8 @@ def _update_available_models(model_cache_key: str, model_name: str, validated_mo
         model_size = get_model_size(model_name, validated_model_properties)
         if lock.locked():
             raise ModelCacheManagementError("Request rejected, as this request attempted to update the model cache, while "
-                                            "another request was updating the model cache at the same time.\n "
-                                            "Please wait for 10 seconds and send the request again.\n "
-                                            "Marqo's documentation can be found here: `https://docs.marqo.ai/latest/`")
+                                            "another request was updating the model cache at the same time. "
+                                            "Please wait for 10 seconds and send the request again ")
         with lock:
             _validate_model_into_device(model_name, validated_model_properties, device,
                                        calling_func=_update_available_models.__name__)
@@ -483,7 +482,7 @@ def _check_memory_threshold_for_model(device: str, model_size: Union[float, int]
             f"You are trying to load a model with size = `{model_size}` into device = `{device}`, which is larger than the device threshold = `{threshold}`. "
             f"Marqo CANNOT find enough space for the model. Please change the threshold by adjusting the environment variables.\n"
             f"Please modify the threshold by setting the environment variable `MARQO_MAX_CUDA_MODEL_MEMORY` or `MARQO_MAX_CPU_MODEL_MEMORY`."
-            f"You can find more detailed information at `https://docs.marqo.ai/latest/other-resources/guides/advanced-usage/configuration/`.")
+            f"You can find more detailed information at {marqo_docs.configuring_marqo()}.")
     return (used_memory + model_size) < threshold
 
 
diff --git a/src/marqo/tensor_search/models/score_modifiers_object.py b/src/marqo/tensor_search/models/score_modifiers_object.py
index d4e70bbd1..713e64162 100644
--- a/src/marqo/tensor_search/models/score_modifiers_object.py
+++ b/src/marqo/tensor_search/models/score_modifiers_object.py
@@ -5,7 +5,7 @@
 
 from marqo.core.models.score_modifier import ScoreModifierType, ScoreModifier
 from marqo.api.exceptions import InvalidArgError
-
+from marqo import marqo_docs
 
 class ScoreModifierValidationError(InvalidArgError):
     def __init__(self, modifier: Dict[str, Any], message: str, link: str = None):
@@ -13,7 +13,7 @@ def __init__(self, modifier: Dict[str, Any], message: str, link: str = None):
             link=link,
             message=f"Error validating score_modifiers = `{modifier}`. Reason: \n{message} "
                     f"Please revise your score_modifiers based on the provided error."
-                    f"\n Check `https://docs.marqo.ai/0.0.17/API-Reference/search/#score-modifiers` for more info."
+                    f"\n Check {marqo_docs.search_api_score_modifiers_parameter()} for more info."
         )
 
 
diff --git a/src/marqo/tensor_search/tensor_search_logging.py b/src/marqo/tensor_search/tensor_search_logging.py
index 12df76207..c319fc31b 100644
--- a/src/marqo/tensor_search/tensor_search_logging.py
+++ b/src/marqo/tensor_search/tensor_search_logging.py
@@ -1,6 +1,8 @@
 import logging
 from marqo.tensor_search.utils import read_env_vars_and_defaults
 from marqo.api.exceptions import EnvVarError
+from marqo import marqo_docs
+
 def get_logger(name):
     logging.basicConfig()
     logger = logging.getLogger(name)
@@ -17,7 +19,7 @@ def get_logger(name):
     else:
         raise EnvVarError(f"The provided environment variable `MARQO_LOG_LEVEL` = `{log_level}` is not supported."
                           f"The environment variable `MARQO_LOG_LEVEL` should be one of `error`, `warning`, `info`, `debug`."
-                          f"Check https://docs.marqo.ai/0.0.13/Advanced-Usage/configuration/ for more info.")
+                          f"Check {marqo_docs.configuring_marqo()} for more info.")
 
     formatter = logging.Formatter(
         "{asctime} {threadName:>11} {levelname} {message}", style='{')

From a0f9cd4bcb1b002842562efd77dd70785f107dfc Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Wed, 25 Sep 2024 12:17:30 +1000
Subject: [PATCH 31/63] Fix marqo_docs()

---
 src/marqo/logging.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/marqo/logging.py b/src/marqo/logging.py
index 85d7d9e11..cb9f807ea 100644
--- a/src/marqo/logging.py
+++ b/src/marqo/logging.py
@@ -1,8 +1,8 @@
 import logging
 
+from marqo import marqo_docs
 from marqo.api.exceptions import EnvVarError
 from marqo.tensor_search.utils import read_env_vars_and_defaults
-from marqo_docs import configuring_marqo
 
 
 def get_logger(name):
@@ -21,6 +21,6 @@ def get_logger(name):
     else:
         raise EnvVarError(f"The provided environment variable `MARQO_LOG_LEVEL` = `{log_level}` is not supported."
                           f"The environment variable `MARQO_LOG_LEVEL` should be one of `error`, `warning`, `info`, `debug`."
-                          f"Check {configuring_marqo()} for more info.")
+                          f"Check {marqo_docs.configuring_marqo()} for more info.")
 
     return logger

From 6ba797354dbff4a1ab2cbf8a491a49bb8ce62ca7 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Mon, 7 Oct 2024 10:58:39 +1100
Subject: [PATCH 32/63] Fix tests

---
 examples/GPT-examples/utilities.py            |   2 +-
 .../inference/models/abstract_clip_model.py   |   2 +-
 .../core/inference/models/hf_tokenizer.py     |  33 +++
 .../inference/models/hugging_face_model.py    | 176 +++++------
 .../core/inference/models/open_clip_model.py  | 274 ++++++++++++++++++
 .../models/open_clip_model_properties.py      |   2 +-
 src/marqo/core/models/marqo_index.py          |   4 +-
 src/marqo/s2_inference/clip_utils.py          | 254 +---------------
 src/marqo/s2_inference/hf_utils.py            | 257 ----------------
 .../languagebind/audio/modeling_audio.py      |   8 +-
 .../languagebind/audio/tokenization_audio.py  |   4 +-
 .../languagebind/image/modeling_image.py      |   8 +-
 .../languagebind/image/tokenization_image.py  |   4 +-
 .../languagebind/video/modeling_video.py      |   8 +-
 .../languagebind/video/tokenization_video.py  |   4 +-
 src/marqo/s2_inference/model_registry.py      |   3 +-
 .../s2_inference/multimodal_model_load.py     |   2 +-
 .../processing/custom_clip_utils.py           | 170 +----------
 .../s2_inference/processing/yolox_utils.py    |   2 +-
 src/marqo/s2_inference/random_utils.py        |   2 +-
 .../s2_inference/reranking/model_utils.py     |   8 +-
 src/marqo/s2_inference/s2_inference.py        |   3 +-
 .../core/inference/test_hugging_face_model.py |   2 +-
 .../test_marqo_fashion_clip.py                |   2 +-
 .../test_open_clip_model_load.py              |   6 +-
 tests/s2_inference/test_clip_utils.py         |   3 +-
 tests/tensor_search/test_model_auth.py        |   2 +-
 tests/tensor_search/test_on_start_script.py   |   2 +-
 28 files changed, 442 insertions(+), 805 deletions(-)
 create mode 100644 src/marqo/core/inference/models/hf_tokenizer.py
 create mode 100644 src/marqo/core/inference/models/open_clip_model.py
 delete mode 100644 src/marqo/s2_inference/hf_utils.py

diff --git a/examples/GPT-examples/utilities.py b/examples/GPT-examples/utilities.py
index 23ae6f0be..0931e40bb 100644
--- a/examples/GPT-examples/utilities.py
+++ b/examples/GPT-examples/utilities.py
@@ -205,7 +205,7 @@ def _lies_between(offset_tuple, offset):
 
 def _find_end_character_mapping(offset_mapping, offset):
     """assumes sorted offset_mapping. unless this was modified 
-       this will be the default from the tokenizer
+       this will be the default from the _tokenizer
     """
     # if the max length is bigger we just return the last index
     if offset >= max(offset_mapping[-1]):
diff --git a/src/marqo/core/inference/models/abstract_clip_model.py b/src/marqo/core/inference/models/abstract_clip_model.py
index f03e8efb6..a7297c047 100644
--- a/src/marqo/core/inference/models/abstract_clip_model.py
+++ b/src/marqo/core/inference/models/abstract_clip_model.py
@@ -20,7 +20,7 @@ class AbstractCLIPModel(AbstractEmbeddingModel):
         model_properties (dict): A dictionary containing additional properties or configurations
             specific to the model. Defaults to an empty dictionary if not provided.
         model: The actual CLIP model instance, initialized to `None` and to be set by subclasses.
-        tokenizer: The tokenizer associated with the model, initialized to `None` and to be set by subclasses.
+        tokenizer: The _tokenizer associated with the model, initialized to `None` and to be set by subclasses.
         preprocess: The preprocessing pipeline for the model, initialized to `None` and to be set by subclasses.
     """
 
diff --git a/src/marqo/core/inference/models/hf_tokenizer.py b/src/marqo/core/inference/models/hf_tokenizer.py
new file mode 100644
index 000000000..7b3d7096c
--- /dev/null
+++ b/src/marqo/core/inference/models/hf_tokenizer.py
@@ -0,0 +1,33 @@
+import html
+from typing import Union, List
+
+import ftfy
+import regex as re
+import torch
+
+
+def whitespace_clean(text):
+    text = re.sub(r'\s+', ' ', text)
+    text = text.strip()
+    return text
+
+def basic_clean(text):
+    text = ftfy.fix_text(text)
+    text = html.unescape(html.unescape(text))
+    return text.strip()
+
+class HFTokenizer:
+    # HuggingFace _tokenizer wrapper
+    # Check https://github.com/mlfoundations/open_clip/blob/16e229c596cafaec46a4defaf27e0e30ffcca12d/src/open_clip/tokenizer.py#L188-L201
+    def __init__(self, tokenizer_name: str):
+        from transformers import AutoTokenizer
+        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
+
+    def __call__(self, texts: Union[str, List[str]]) -> torch.Tensor:
+        # same cleaning as for default _tokenizer, except lowercasing
+        # adding lower (for case-sensitive tokenizers) will make it more robust but less sensitive to nuance
+        if isinstance(texts, str):
+            texts = [texts]
+        texts = [whitespace_clean(basic_clean(text)) for text in texts]
+        input_ids = self.tokenizer(texts, return_tensors='pt', padding='max_length', truncation=True).input_ids
+        return input_ids
\ No newline at end of file
diff --git a/src/marqo/core/inference/models/hugging_face_model.py b/src/marqo/core/inference/models/hugging_face_model.py
index 7eb99989b..486d464b6 100644
--- a/src/marqo/core/inference/models/hugging_face_model.py
+++ b/src/marqo/core/inference/models/hugging_face_model.py
@@ -16,15 +16,7 @@
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.errors import InvalidModelPropertiesError
 from marqo.s2_inference.types import Union, FloatTensor, List
-
-
-def _average_pool_func(model_output, attention_mask):
-    last_hidden = model_output.last_hidden_state.masked_fill(~attention_mask[..., None].bool(), 0.0)
-    return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
-
-
-def _cls_pool_func(model_output, attention_mask = None):
-    return model_output[0][:,0]
+from marqo.core.exceptions import InternalError
 
 
 class HuggingFaceModel(AbstractEmbeddingModel):
@@ -33,9 +25,9 @@ class HuggingFaceModel(AbstractEmbeddingModel):
     def __init__(self, model_properties: dict, device: str, model_auth: dict):
         super().__init__(model_properties, device, model_auth)
 
-        self.model = None
-        self.tokenizer = None
-        self.pooling_func = None
+        self._model = None
+        self._tokenizer = None
+        self._pooling_func = None
 
     def _build_model_properties(self, model_properties: dict):
         try:
@@ -45,27 +37,27 @@ def _build_model_properties(self, model_properties: dict):
                 from e
 
     def _check_loaded_components(self):
-        if self.model is None:
-            raise RuntimeError("Model is not loaded!")
-        if self.tokenizer is None:
-            raise RuntimeError("Tokenizer is not loaded!")
-        if self.pooling_func is None:
-            raise RuntimeError("Pooling function is not loaded!")
+        if self._model is None:
+            raise InternalError("Model is not loaded!")
+        if self._tokenizer is None:
+            raise InternalError("Tokenizer is not loaded!")
+        if self._pooling_func is None:
+            raise InternalError("Pooling function is not loaded!")
 
     def _load_necessary_components(self):
         if self.model_properties.name:
-            self.model, self.tokenizer = self._load_from_hugging_face_repo()
+            self._model, self._tokenizer = self._load_from_hugging_face_repo()
         elif self.model_properties.url or self.model_properties.model_location:
-            self.model, self.tokenizer = self._load_from_zip_file()
+            self._model, self._tokenizer = self._load_from_zip_file()
         else:
             raise InvalidModelPropertiesError(
                 f"Invalid model properties for the 'hf' model. "
                 f"You do not have the necessary information to load the model. "
                 f"Check {marqo_docs.bring_your_own_model()} for more information."
             )
-        self.model = self.model.to(self.device)
-        self.pooling_func = self._load_pooling_method()
-        self.model.eval()
+        self._model = self._model.to(self.device)
+        self._pooling_func = self._load_pooling_method()
+        self._model.eval()
 
     def _load_from_hugging_face_repo(self) -> Tuple:
         """Load the model from the Hugging Face model hub based on the repo_id."""
@@ -88,7 +80,7 @@ def _load_from_zip_file(self) -> Tuple:
             download_dir=ModelCache.hf_cache_path
         )
 
-        model_dir = extract_huggingface_archive(zip_file_path)
+        model_dir = self.extract_huggingface_archive(zip_file_path)
         try:
             model = AutoModel.from_pretrained(model_dir).to(self.device)
             tokenizer = AutoTokenizer.from_pretrained(model_dir)
@@ -102,9 +94,9 @@ def _load_from_zip_file(self) -> Tuple:
     def _load_pooling_method(self) -> Callable:
         """Load the pooling method for the model."""
         if self.model_properties.pooling_method == PoolingMethod.Mean:
-            return _average_pool_func
+            return self._average_pool_func
         elif self.model_properties.pooling_method == PoolingMethod.CLS:
-            return _cls_pool_func
+            return self._cls_pool_func
         else:
             raise ValueError(f"Invalid pooling method: {self.model_properties.pooling_method}")
 
@@ -112,11 +104,10 @@ def encode(self, sentence: Union[str, List[str]], normalize=True, **kwargs) -> U
         if isinstance(sentence, str):
             sentence = [sentence]
 
-        if self.model is None:
+        if self._model is None:
             self.load()
 
-        self.model.normalize = normalize
-        tokenized_texts = self.tokenizer(
+        tokenized_texts = self._tokenizer(
             sentence,
             padding=True,
             truncation=True,
@@ -125,11 +116,11 @@ def encode(self, sentence: Union[str, List[str]], normalize=True, **kwargs) -> U
         ).to(self.device)
 
         with torch.no_grad():
-            model_output = self.model(**tokenized_texts)
+            model_output = self._model(**tokenized_texts)
 
         attention_mask = tokenized_texts['attention_mask']
 
-        embeddings = self.pooling_func(model_output, attention_mask)
+        embeddings = self._pooling_func(model_output, attention_mask)
 
         if normalize:
             embeddings = F.normalize(embeddings, p=2, dim=1)
@@ -142,60 +133,71 @@ def _convert_output(self, output):
         elif self.device.startswith('cuda'):
             return output.cpu().numpy()
 
-def extract_huggingface_archive(path: str) -> str:
-    '''
-
-        This function takes the path as input. The path can must be a string that can be:
-        1. A downloaded archive file. This function will extract the model from the archive return the directory path.
-        2. A repo_id in huggingface. This function will return the input string directly.
-
-        path: the downloaded model archive path or a repo_id in huggingface
-    Returns:
-        The directory path to the model or the repo_id in huggingface
-    '''
-    if os.path.isfile(path):
-        # if it's a file, check if it's a compressed file
-        base, ext = os.path.splitext(path)
-        if ext in ['.bin', '.pt']:
-            raise InvalidModelPropertiesError(f"Marqo does not support loading Hugging Face SBERT models from the provided single `{ext}` file. "
-                                              "Please try to wrap the model in a Hugging Face archive file and try again. ")
-        try:
-            # create a new directory with the same name as the file
-            new_dir = base
-            os.makedirs(new_dir, exist_ok=True)
-
-            # extract the compressed file
-            # If the target directory already exists, it will be overwritten by default without warning.
-            if ext == '.zip':
-                with zipfile.ZipFile(path, 'r') as zip_ref:
-                    zip_ref.extractall(new_dir)
-            else:
-                with tarfile.open(path, 'r') as tar_ref:
-                    tar_ref.extractall(new_dir)
-            # return the path to the new directory
-            return new_dir
-        except (tarfile.ReadError, zipfile.BadZipfile):
+    @staticmethod
+    def _average_pool_func(model_output, attention_mask):
+        """A pooling function that averages the hidden states of the model."""
+        last_hidden = model_output.last_hidden_state.masked_fill(~attention_mask[..., None].bool(), 0.0)
+        return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
+
+    @staticmethod
+    def _cls_pool_func(model_output, attention_mask=None):
+        """A pooling function that extracts the CLS token from the model."""
+        return model_output[0][:, 0]
+
+    @staticmethod
+    def extract_huggingface_archive(path: str) -> str:
+        '''
+            This function takes the path as input. The path can must be a string that can be:
+            1. A downloaded archive file. This function will extract the model from the archive return the directory path.
+            2. A repo_id in huggingface. This function will return the input string directly.
+
+            path: the downloaded model archive path or a repo_id in huggingface
+        Returns:
+            The directory path to the model or the repo_id in huggingface
+        '''
+        if os.path.isfile(path):
+            # if it's a file, check if it's a compressed file
+            base, ext = os.path.splitext(path)
+            if ext in ['.bin', '.pt']:
+                raise InvalidModelPropertiesError(f"Marqo does not support loading Hugging Face SBERT models from the provided single `{ext}` file. "
+                                                  "Please try to wrap the model in a Hugging Face archive file and try again. ")
             try:
-                os.remove(path)
-            except Exception as remove_e:
-                raise RuntimeError(
-                    f"Marqo encountered an error while attempting to delete a corrupted file `{path}`. "
-                    f"Please report this issue on Marqo's Github Repo and replace the problematic Marqo instance with "
-                    f"a new one. \n "
-                    f"Error message: `{str(remove_e)}`"
-                )
-            raise InvalidModelPropertiesError(f'Marqo encountered an error while extracting the compressed model archive from `{path}`.\n '
-                                              f'This is probably because the file is corrupted or the extension `{ext}` is not supported. '
-                                              f'Marqo has removed the corrupted file from the disk.'
-                                              f'Please ensure that the file is a valid compressed file and try again.')
-        # will this error really happen?
-        except PermissionError:
-            raise InvalidModelPropertiesError(f'Marqo encountered an error while extracting the compressed model archive from `{path}`. '
-                                              f'This is probably because the Marqo does not have the permission to write to the directory. '
-                                              f'Please check the access permission of Marqo and try again.')
-        except Exception as e:
-            raise RuntimeError(f'Marqo encountered an error while extracting the compressed model archive from `{path}`. '
-                                              f'The original error message is `{str(e)}`')
-    else:
-        # return the directory path or repo_id directory
-        return path
\ No newline at end of file
+                # create a new directory with the same name as the file
+                new_dir = base
+                os.makedirs(new_dir, exist_ok=True)
+
+                # extract the compressed file
+                # If the target directory already exists, it will be overwritten by default without warning.
+                if ext == '.zip':
+                    with zipfile.ZipFile(path, 'r') as zip_ref:
+                        zip_ref.extractall(new_dir)
+                else:
+                    with tarfile.open(path, 'r') as tar_ref:
+                        tar_ref.extractall(new_dir)
+                # return the path to the new directory
+                return new_dir
+            except (tarfile.ReadError, zipfile.BadZipfile):
+                try:
+                    os.remove(path)
+                except Exception as remove_e:
+                    raise RuntimeError(
+                        f"Marqo encountered an error while attempting to delete a corrupted file `{path}`. "
+                        f"Please report this issue on Marqo's Github Repo and replace the problematic Marqo instance with "
+                        f"a new one. \n "
+                        f"Error message: `{str(remove_e)}`"
+                    )
+                raise InvalidModelPropertiesError(f'Marqo encountered an error while extracting the compressed model archive from `{path}`.\n '
+                                                  f'This is probably because the file is corrupted or the extension `{ext}` is not supported. '
+                                                  f'Marqo has removed the corrupted file from the disk.'
+                                                  f'Please ensure that the file is a valid compressed file and try again.')
+            # will this error really happen?
+            except PermissionError:
+                raise InvalidModelPropertiesError(f'Marqo encountered an error while extracting the compressed model archive from `{path}`. '
+                                                  f'This is probably because the Marqo does not have the permission to write to the directory. '
+                                                  f'Please check the access permission of Marqo and try again.')
+            except Exception as e:
+                raise RuntimeError(f'Marqo encountered an error while extracting the compressed model archive from `{path}`. '
+                                                  f'The original error message is `{str(e)}`')
+        else:
+            # return the directory path or repo_id directory
+            return path
\ No newline at end of file
diff --git a/src/marqo/core/inference/models/open_clip_model.py b/src/marqo/core/inference/models/open_clip_model.py
new file mode 100644
index 000000000..c8106c169
--- /dev/null
+++ b/src/marqo/core/inference/models/open_clip_model.py
@@ -0,0 +1,274 @@
+import os
+
+import open_clip
+import torch
+from open_clip.pretrained import _pcfg, _slpcfg, _apcfg
+from open_clip.transform import image_transform_v2, PreprocessCfg, merge_preprocess_dict
+from torchvision.transforms import Compose
+
+from marqo import marqo_docs
+from marqo.core.inference.models.abstract_clip_model import AbstractCLIPModel
+from marqo.core.inference.models.open_clip_model_properties import OpenCLIPModelProperties, ImagePreprocessor
+from marqo.s2_inference.configs import ModelCache
+from marqo.s2_inference.errors import InvalidModelPropertiesError
+from marqo.s2_inference.logger import get_logger
+from marqo.core.inference.models.hf_tokenizer import HFTokenizer
+from marqo.core.inference.model_download import download_model
+from marqo.s2_inference.types import *
+from marqo.tensor_search.models.private_models import ModelLocation
+
+logger = get_logger(__name__)
+
+HF_HUB_PREFIX = "hf-hub:"
+MARQO_OPEN_CLIP_REGISTRY_PREFIX = "open_clip/"
+
+
+class OPEN_CLIP(AbstractCLIPModel):
+    def __init__(
+            self,
+            device: Optional[str] = None,
+            model_properties: Optional[Dict] = None,
+            model_auth: Optional[Dict] = None,
+    ) -> None:
+
+        super().__init__(device, model_properties, model_auth)
+
+        # model_auth gets passed through add_docs and search requests:
+        self.preprocess_config = None
+
+    def _build_model_properties(self, model_properties: dict):
+        return OpenCLIPModelProperties(**model_properties)
+
+    def _load_necessary_components(self) -> None:
+        """Load the open_clip model and _tokenizer."""
+        if self.model_properties.url is not None or self.model_properties.model_location is not None:
+            self.model, self.preprocess = self._load_model_and_image_preprocessor_from_checkpoint()
+            self.tokenizer = self._load_tokenizer_from_checkpoint()
+        elif self.model_properties.name.startswith(HF_HUB_PREFIX):
+            self.model, self.preprocess = self._load_model_and_image_preprocessor_from_hf_repo()
+            self.tokenizer = self._load_tokenizer_from_hf_repo()
+        elif self.model_properties.name.startswith(MARQO_OPEN_CLIP_REGISTRY_PREFIX):
+            self.model, self.preprocess = self._load_model_and_image_preprocessor_from_open_clip_repo()
+            self.tokenizer = self._load_tokenizer_from_open_clip_repo()
+        else:
+            raise InvalidModelPropertiesError(
+                f"Marqo cannot load the provided open_clip model. "
+                f"Check {marqo_docs.bring_your_own_model()} "
+                f"for more details on the supported methods to open_clip model "
+            )
+        self.model = self.model.to(self.device)
+        self.model.eval()
+
+    def _check_loaded_components(self):
+        """Check if the open_clip model, _tokenizer, and image preprocessor are loaded.
+
+        Raises:
+            RuntimeError: If the open_clip model, _tokenizer, or image preprocessor is not loaded.
+        """
+        if self.model is None:
+            raise RuntimeError("The open_clip model is not loaded. Please load the model before inference.")
+        if self.tokenizer is None:
+            raise RuntimeError("The open_clip _tokenizer is not loaded. Please load the _tokenizer before inference.")
+        if self.preprocess is None:
+            raise RuntimeError("The open_clip image preprocessor is not loaded. "
+                               "Please load the image preprocessor before inference.")
+
+    def _load_image_preprocessor(self) -> Callable:
+        return image_transform_v2(self.preprocess_config)
+
+    def _aggregate_image_preprocessor_config(self) -> PreprocessCfg:
+        """Aggregate the image preprocessor configuration for the open_clip model."""
+
+        if self.model_properties.image_preprocessor in [ImagePreprocessor.OpenCLIP, ImagePreprocessor.OpenAI]:
+            base_image_preprocess_config = _pcfg()
+        elif self.model_properties.image_preprocessor in [ImagePreprocessor.SigLIP]:
+            base_image_preprocess_config = _slpcfg()
+        elif self.model_properties.image_preprocessor in [ImagePreprocessor.CLIPA]:
+            base_image_preprocess_config = _apcfg()
+        else:
+            raise ValueError(f"Invalid image preprocessor {self.model_properties.image_preprocessor}")
+
+        aggregated_image_preprocess_config = PreprocessCfg(
+            **merge_preprocess_dict(
+                base_image_preprocess_config, self.model_properties.dict(exclude_none=True)
+            )
+        )
+
+        return aggregated_image_preprocess_config
+
+    def _load_model_and_image_preprocessor_from_checkpoint(self) -> Tuple[torch.nn.Module, Compose]:
+        """Load the model and image preprocessor from a checkpoint file.
+
+        The checkpoint file can be provided through a URL or a model_location object.
+        """
+        # Load the image preprocessor
+        if self.model_properties.url and self.model_properties.model_location:
+            raise InvalidModelPropertiesError(
+                "Only one of url, model_location can be specified in 'model_properties' "
+            )
+        elif self.model_properties.model_location:
+            self.model_path = self._download_from_repo()
+        elif self.model_properties.url:
+            self.model_path = download_model(url=self.model_properties.url)
+        else:
+            raise ValueError("The 'url' or 'model_location' is required in 'model_properties' "
+                             "when loading a custom open_clip model through a URL or a model_location object")
+
+        logger.info(f"The name of the custom clip model is {self.model_properties.name}. We use open_clip loader")
+
+        try:
+            self.preprocess_config = self._aggregate_image_preprocessor_config()
+            preprocess = image_transform_v2(self.preprocess_config, is_train=False)
+            model = open_clip.create_model(
+                model_name=self.model_properties.name,
+                jit=self.model_properties.jit,
+                pretrained=self.model_path,
+                precision=self.model_properties.precision,
+                device=self.device,
+                cache_dir=ModelCache.clip_cache_path
+            )
+            return model, preprocess
+        except Exception as e:
+            if (isinstance(e, RuntimeError) and "The file might be corrupted" in str(e)):
+                try:
+                    os.remove(self.model_path)
+                except Exception as remove_e:
+                    raise RuntimeError(
+                        f"Marqo encountered an error while attempting to delete a corrupted file '{self.model_path}'. "
+                        f"Please report this issue on Marqo's Github Repo and replace the problematic Marqo instance "
+                        f"with a new one. \n "
+                        f"Error message: `{str(remove_e)}`"
+                    )
+                raise InvalidModelPropertiesError(
+                    f"Marqo encountered a corrupted file when loading open_clip file '{self.model_path}'. "
+                    f"Marqo has removed this file from the disk. "
+                    f"Some possible causes are: "
+                    f"1. the file was not a valid open_clip checkpoint, "
+                    f"2. the file was corrupted during download or incompletely downloaded, "
+                    f"3. you may have tried to load a clip model even though model_properties['type'] is set to 'open_clip' "
+                    f"Please check and update your model properties and retry. "
+                    f"You can find more details at {marqo_docs.bring_your_own_model()}")
+            # It is tricky to cacth the error when loading clip model using type = open_clip. Different pytorch version will raise different error.
+            elif isinstance(e, (AttributeError, RuntimeError)) or (
+                    "This could be because the operator doesn't exist for this backend" in str(e)):
+                raise InvalidModelPropertiesError(
+                    f"Marqo encountered an error when loading custom open_clip model '{self.model_properties.name}' with "
+                    f"model properties = '{self.model_properties.dict()}'. "
+                    f"The error message is {str(e)}. "
+                    f"You may have tried to load a clip model even though model_properties['type'] is set to 'open_clip' "
+                    f"Please check and update your model properties and retry. "
+                    f"You can find more details at {marqo_docs.bring_your_own_model()}"
+                )
+            else:
+                raise RuntimeError(
+                    f"Marqo encountered an error when loading custom open_clip model {self.model_properties.name} with "
+                    f"model properties = {self.model_properties.dict()}. "
+                    f"The error message is {str(e)}. "
+                    f"Please check and update your model properties and retry. "
+                    f"You can find more details at {marqo_docs.bring_your_own_model()}"
+                )
+
+    def _load_model_and_image_preprocessor_from_hf_repo(self) -> Tuple[torch.nn.Module, Compose]:
+        """Load the model and image preprocessor from a hf_repo.
+
+        The hf_repo should be provided in the model properties, and it is a string starting with `hf-hub:`.
+        """
+        model, _, preprocess = open_clip.create_model_and_transforms(
+            model_name=self.model_properties.name,
+            device=self.device,
+            cache_dir=ModelCache.clip_cache_path,
+        )
+        return model, preprocess
+
+    def _load_model_and_image_preprocessor_from_open_clip_repo(self) -> Tuple[torch.nn.Module, Compose]:
+        """Load the model and image preprocessor from the marqo model registry.
+
+        The model name should be provided in the model properties, and it is a string starting with `open_clip/`.
+        """
+        architecture = self.model_properties.name.split("/", 3)[1]
+        pretrained = self.model_properties.name.split("/", 3)[2]
+
+        model, _, preprocess = open_clip.create_model_and_transforms(
+            model_name=architecture,
+            pretrained=pretrained,
+            device=self.device,
+            cache_dir=ModelCache.clip_cache_path
+        )
+        return model, preprocess
+
+    def _load_tokenizer_from_checkpoint(self) -> Callable:
+        if not self.model_properties._tokenizer:
+            return open_clip.get_tokenizer(self.model_properties.name)
+        else:
+            logger.info(f"Custom HFTokenizer is provided. Loading...")
+            return HFTokenizer(self.model_properties._tokenizer)
+
+    def _load_tokenizer_from_hf_repo(self) -> Callable:
+        return open_clip.get_tokenizer(self.model_properties.name)
+
+    def _load_tokenizer_from_open_clip_repo(self) -> Callable:
+        return open_clip.get_tokenizer(self.model_properties.name.split("/", 3)[1])
+
+    def _download_from_repo(self):
+        """Downloads model from an external repo like s3 and returns the filepath
+
+        Returns:
+            The model's filepath
+
+        Raises:
+            RunTimeError if an empty filepath is detected. This is important
+                because OpenCLIP will instantiate a model with random weights, if
+                a filepath isn't specified, and the model isn't a publicly
+                available HF or OpenAI one.
+        """
+        model_location: ModelLocation = self.model_properties.model_location
+        download_model_params = {"repo_location": model_location}
+
+        if model_location.auth_required:
+            download_model_params['auth'] = self.model_properties.model_auth
+
+        model_file_path = download_model(**download_model_params)
+        if model_file_path is None or model_file_path == '':
+            raise RuntimeError(
+                'download_model() needs to return a valid filepath to the model! Instead, received '
+                f' filepath `{model_file_path}`')
+        return model_file_path
+
+    def encode_image(self, images: Union[str, ImageType, List[Union[str, ImageType]]],
+                     image_download_headers: Optional[Dict] = None,
+                     normalize=True) -> FloatTensor:
+
+        self.image_input_processed: Tensor = self._preprocess_images(images, image_download_headers)
+
+        with torch.no_grad():
+            if self.device.startswith("cuda"):
+                with torch.cuda.amp.autocast():
+                    outputs = self.model.encode_image(self.image_input_processed).to(torch.float32)
+            else:
+                outputs = self.model.encode_image(self.image_input_processed).to(torch.float32)
+
+        if normalize:
+            _shape_before = outputs.shape
+            outputs /= self.normalize(outputs)
+            assert outputs.shape == _shape_before
+        return self._convert_output(outputs)
+
+    def encode_text(self, sentence: Union[str, List[str]], normalize=True) -> FloatTensor:
+        if self.model is None:
+            self.load()
+
+        text = self.tokenizer(sentence).to(self.device)
+
+        with torch.no_grad():
+            if self.device.startswith("cuda"):
+                with torch.cuda.amp.autocast():
+                    outputs = self.model.encode_text(text).to(torch.float32)
+            else:
+                outputs = self.model.encode_text(text).to(torch.float32)
+
+        if normalize:
+            _shape_before = outputs.shape
+            outputs /= self.normalize(outputs)
+            assert outputs.shape == _shape_before
+
+        return self._convert_output(outputs)
\ No newline at end of file
diff --git a/src/marqo/core/inference/models/open_clip_model_properties.py b/src/marqo/core/inference/models/open_clip_model_properties.py
index 7b1d44d01..53200a47a 100644
--- a/src/marqo/core/inference/models/open_clip_model_properties.py
+++ b/src/marqo/core/inference/models/open_clip_model_properties.py
@@ -34,7 +34,7 @@ class OpenCLIPModelProperties(MarqoBaseModel):
         precision: The precision of the model. It should be either 'fp32' or 'fp16'.
         url: The URL of the model checkpoint. It is optional.
         model_location: The location of the model. It is optional.
-        tokenizer: The name of the tokenizer. It is optional.
+        tokenizer: The name of the _tokenizer. It is optional.
         model_auth: The authentication information for the model. It is optional.
         image_preprocessor: The image preprocessor used by the model. It should be one of the values in the
             ImagePreprocessor enum.
diff --git a/src/marqo/core/models/marqo_index.py b/src/marqo/core/models/marqo_index.py
index 38c6f3ee2..8410410e7 100644
--- a/src/marqo/core/models/marqo_index.py
+++ b/src/marqo/core/models/marqo_index.py
@@ -209,7 +209,7 @@ def get_text_query_prefix(self, request_level_prefix: Optional[str] = None) -> s
             return request_level_prefix
 
         # For backwards compatibility. Since older versions of Marqo did not have a text_query_prefix field,
-        # we need to return an empty string if the model does not have a text_query_prefix. 
+        # we need to return an empty string if the model does not have a text_query_prefix.
         # We know that the value of text_query_prefix is None in old indexes since the model was not populated
         # from the registry.
         if self.text_query_prefix is None:
@@ -223,7 +223,7 @@ def get_text_chunk_prefix(self, request_level_prefix: Optional[str] = None) -> s
             return request_level_prefix
 
         # For backwards compatibility. Since older versions of Marqo did not have a text_chunk_prefix field,
-        # we need to return an empty string if the model does not have a text_chunk_prefix. 
+        # we need to return an empty string if the model does not have a text_chunk_prefix.
         # We know that the value of text_chunk_prefix is None in old indexes since the model was not populated
         # from the registry.
         if self.text_chunk_prefix is None:
diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index c9d760cd2..bb0fb285c 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -26,7 +26,8 @@
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.errors import InvalidModelPropertiesError, ImageDownloadError
 from marqo.s2_inference.logger import get_logger
-from marqo.s2_inference.processing.custom_clip_utils import HFTokenizer, download_model
+from marqo.core.inference.models.hf_tokenizer import HFTokenizer
+from marqo.core.inference.model_download import download_model
 from marqo.s2_inference.types import *
 from marqo.tensor_search.enums import ModelProperties, InferenceParams
 from marqo.tensor_search.models.private_models import ModelLocation
@@ -523,257 +524,6 @@ def load(self) -> None:
         self.model.eval()
 
 
-class OPEN_CLIP(AbstractCLIPModel):
-    def __init__(
-            self,
-            device: Optional[str] = None,
-            model_properties: Optional[Dict] = None,
-            model_auth: Optional[Dict] = None,
-    ) -> None:
-
-        super().__init__(device, model_properties, model_auth)
-
-        # model_auth gets passed through add_docs and search requests:
-        self.preprocess_config = None
-
-    def _build_model_properties(self, model_properties: dict):
-        return OpenCLIPModelProperties(**model_properties)
-
-    def _load_necessary_components(self) -> None:
-        """Load the open_clip model and tokenizer."""
-        if self.model_properties.url is not None or self.model_properties.model_location is not None:
-            self.model, self.preprocess = self._load_model_and_image_preprocessor_from_checkpoint()
-            self.tokenizer = self._load_tokenizer_from_checkpoint()
-        elif self.model_properties.name.startswith(HF_HUB_PREFIX):
-            self.model, self.preprocess = self._load_model_and_image_preprocessor_from_hf_repo()
-            self.tokenizer = self._load_tokenizer_from_hf_repo()
-        elif self.model_properties.name.startswith(MARQO_OPEN_CLIP_REGISTRY_PREFIX):
-            self.model, self.preprocess = self._load_model_and_image_preprocessor_from_open_clip_repo()
-            self.tokenizer = self._load_tokenizer_from_open_clip_repo()
-        else:
-            raise InvalidModelPropertiesError(
-                f"Marqo cannot load the provided open_clip model. "
-                f"Check {marqo_docs.bring_your_own_model()} "
-                f"for more details on the supported methods to open_clip model "
-            )
-        self.model = self.model.to(self.device)
-        self.model.eval()
-
-    def _check_loaded_components(self):
-        """Check if the open_clip model, tokenizer, and image preprocessor are loaded.
-
-        Raises:
-            RuntimeError: If the open_clip model, tokenizer, or image preprocessor is not loaded.
-        """
-        if self.model is None:
-            raise RuntimeError("The open_clip model is not loaded. Please load the model before inference.")
-        if self.tokenizer is None:
-            raise RuntimeError("The open_clip tokenizer is not loaded. Please load the tokenizer before inference.")
-        if self.preprocess is None:
-            raise RuntimeError("The open_clip image preprocessor is not loaded. "
-                               "Please load the image preprocessor before inference.")
-
-    def _load_image_preprocessor(self) -> Callable:
-        return image_transform_v2(self.preprocess_config)
-
-    def _aggregate_image_preprocessor_config(self) -> PreprocessCfg:
-        """Aggregate the image preprocessor configuration for the open_clip model."""
-
-        if self.model_properties.image_preprocessor in [ImagePreprocessor.OpenCLIP, ImagePreprocessor.OpenAI]:
-            base_image_preprocess_config = _pcfg()
-        elif self.model_properties.image_preprocessor in [ImagePreprocessor.SigLIP]:
-            base_image_preprocess_config = _slpcfg()
-        elif self.model_properties.image_preprocessor in [ImagePreprocessor.CLIPA]:
-            base_image_preprocess_config = _apcfg()
-        else:
-            raise ValueError(f"Invalid image preprocessor {self.model_properties.image_preprocessor}")
-
-        aggregated_image_preprocess_config = PreprocessCfg(
-            **merge_preprocess_dict(
-                base_image_preprocess_config, self.model_properties.dict(exclude_none=True)
-            )
-        )
-
-        return aggregated_image_preprocess_config
-
-    def _load_model_and_image_preprocessor_from_checkpoint(self) -> Tuple[torch.nn.Module, Compose]:
-        """Load the model and image preprocessor from a checkpoint file.
-
-        The checkpoint file can be provided through a URL or a model_location object.
-        """
-        # Load the image preprocessor
-        if self.model_properties.url and self.model_properties.model_location:
-            raise InvalidModelPropertiesError(
-                "Only one of url, model_location can be specified in 'model_properties' "
-            )
-        elif self.model_properties.model_location:
-            self.model_path = self._download_from_repo()
-        elif self.model_properties.url:
-            self.model_path = download_model(url=self.model_properties.url)
-        else:
-            raise ValueError("The 'url' or 'model_location' is required in 'model_properties' "
-                             "when loading a custom open_clip model through a URL or a model_location object")
-
-        logger.info(f"The name of the custom clip model is {self.model_properties.name}. We use open_clip loader")
-
-        try:
-            self.preprocess_config = self._aggregate_image_preprocessor_config()
-            preprocess = image_transform_v2(self.preprocess_config, is_train=False)
-            model = open_clip.create_model(
-                model_name=self.model_properties.name,
-                jit=self.model_properties.jit,
-                pretrained=self.model_path,
-                precision=self.model_properties.precision,
-                device=self.device,
-                cache_dir=ModelCache.clip_cache_path
-            )
-            return model, preprocess
-        except Exception as e:
-            if (isinstance(e, RuntimeError) and "The file might be corrupted" in str(e)):
-                try:
-                    os.remove(self.model_path)
-                except Exception as remove_e:
-                    raise RuntimeError(
-                        f"Marqo encountered an error while attempting to delete a corrupted file '{self.model_path}'. "
-                        f"Please report this issue on Marqo's Github Repo and replace the problematic Marqo instance "
-                        f"with a new one. \n "
-                        f"Error message: `{str(remove_e)}`"
-                    )
-                raise InvalidModelPropertiesError(
-                    f"Marqo encountered a corrupted file when loading open_clip file '{self.model_path}'. "
-                    f"Marqo has removed this file from the disk. "
-                    f"Some possible causes are: "
-                    f"1. the file was not a valid open_clip checkpoint, "
-                    f"2. the file was corrupted during download or incompletely downloaded, "
-                    f"3. you may have tried to load a clip model even though model_properties['type'] is set to 'open_clip' "
-                    f"Please check and update your model properties and retry. "
-                    f"You can find more details at {marqo_docs.bring_your_own_model()}")
-            # It is tricky to cacth the error when loading clip model using type = open_clip. Different pytorch version will raise different error.
-            elif isinstance(e, (AttributeError, RuntimeError)) or (
-                    "This could be because the operator doesn't exist for this backend" in str(e)):
-                raise InvalidModelPropertiesError(
-                    f"Marqo encountered an error when loading custom open_clip model '{self.model_properties.name}' with "
-                    f"model properties = '{self.model_properties.dict()}'. "
-                    f"The error message is {str(e)}. "
-                    f"You may have tried to load a clip model even though model_properties['type'] is set to 'open_clip' "
-                    f"Please check and update your model properties and retry. "
-                    f"You can find more details at {marqo_docs.bring_your_own_model()}"
-                )
-            else:
-                raise RuntimeError(
-                    f"Marqo encountered an error when loading custom open_clip model {self.model_properties.name} with "
-                    f"model properties = {self.model_properties.dict()}. "
-                    f"The error message is {str(e)}. "
-                    f"Please check and update your model properties and retry. "
-                    f"You can find more details at {marqo_docs.bring_your_own_model()}"
-                )
-
-    def _load_model_and_image_preprocessor_from_hf_repo(self) -> Tuple[torch.nn.Module, Compose]:
-        """Load the model and image preprocessor from a hf_repo.
-
-        The hf_repo should be provided in the model properties, and it is a string starting with `hf-hub:`.
-        """
-        model, _, preprocess = open_clip.create_model_and_transforms(
-            model_name=self.model_properties.name,
-            device=self.device,
-            cache_dir=ModelCache.clip_cache_path,
-        )
-        return model, preprocess
-
-    def _load_model_and_image_preprocessor_from_open_clip_repo(self) -> Tuple[torch.nn.Module, Compose]:
-        """Load the model and image preprocessor from the marqo model registry.
-
-        The model name should be provided in the model properties, and it is a string starting with `open_clip/`.
-        """
-        architecture = self.model_properties.name.split("/", 3)[1]
-        pretrained = self.model_properties.name.split("/", 3)[2]
-
-        model, _, preprocess = open_clip.create_model_and_transforms(
-            model_name=architecture,
-            pretrained=pretrained,
-            device=self.device,
-            cache_dir=ModelCache.clip_cache_path
-        )
-        return model, preprocess
-
-    def _load_tokenizer_from_checkpoint(self) -> Callable:
-        if not self.model_properties.tokenizer:
-            return open_clip.get_tokenizer(self.model_properties.name)
-        else:
-            logger.info(f"Custom HFTokenizer is provided. Loading...")
-            return HFTokenizer(self.model_properties.tokenizer)
-
-    def _load_tokenizer_from_hf_repo(self) -> Callable:
-        return open_clip.get_tokenizer(self.model_properties.name)
-
-    def _load_tokenizer_from_open_clip_repo(self) -> Callable:
-        return open_clip.get_tokenizer(self.model_properties.name.split("/", 3)[1])
-
-    def _download_from_repo(self):
-        """Downloads model from an external repo like s3 and returns the filepath
-
-        Returns:
-            The model's filepath
-
-        Raises:
-            RunTimeError if an empty filepath is detected. This is important
-                because OpenCLIP will instantiate a model with random weights, if
-                a filepath isn't specified, and the model isn't a publicly
-                available HF or OpenAI one.
-        """
-        model_location: ModelLocation = self.model_properties.model_location
-        download_model_params = {"repo_location": model_location}
-
-        if model_location.auth_required:
-            download_model_params['auth'] = self.model_properties.model_auth
-
-        model_file_path = download_model(**download_model_params)
-        if model_file_path is None or model_file_path == '':
-            raise RuntimeError(
-                'download_model() needs to return a valid filepath to the model! Instead, received '
-                f' filepath `{model_file_path}`')
-        return model_file_path
-
-    def encode_image(self, images: Union[str, ImageType, List[Union[str, ImageType]]],
-                     image_download_headers: Optional[Dict] = None,
-                     normalize=True) -> FloatTensor:
-
-        self.image_input_processed: Tensor = self._preprocess_images(images, image_download_headers)
-
-        with torch.no_grad():
-            if self.device.startswith("cuda"):
-                with torch.cuda.amp.autocast():
-                    outputs = self.model.encode_image(self.image_input_processed).to(torch.float32)
-            else:
-                outputs = self.model.encode_image(self.image_input_processed).to(torch.float32)
-
-        if normalize:
-            _shape_before = outputs.shape
-            outputs /= self.normalize(outputs)
-            assert outputs.shape == _shape_before
-        return self._convert_output(outputs)
-
-    def encode_text(self, sentence: Union[str, List[str]], normalize=True) -> FloatTensor:
-        if self.model is None:
-            self.load()
-
-        text = self.tokenizer(sentence).to(self.device)
-
-        with torch.no_grad():
-            if self.device.startswith("cuda"):
-                with torch.cuda.amp.autocast():
-                    outputs = self.model.encode_text(text).to(torch.float32)
-            else:
-                outputs = self.model.encode_text(text).to(torch.float32)
-
-        if normalize:
-            _shape_before = outputs.shape
-            outputs /= self.normalize(outputs)
-            assert outputs.shape == _shape_before
-
-        return self._convert_output(outputs)
-
-
 class MULTILINGUAL_CLIP(CLIP):
     def __init__(self, model_type: str = "multilingual-clip/ViT-L/14", device: str = None, embedding_dim: int = None,
                  truncate: bool = True, **kwargs) -> None:
diff --git a/src/marqo/s2_inference/hf_utils.py b/src/marqo/s2_inference/hf_utils.py
deleted file mode 100644
index 8eeadcbf4..000000000
--- a/src/marqo/s2_inference/hf_utils.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# import os, validators
-# import zipfile, tarfile
-# from urllib.error import HTTPError
-# import numpy as np
-# from typing import Optional
-# import torch
-# from torch import nn
-# from transformers import (AutoModel, AutoTokenizer)
-# from marqo.tensor_search.models.private_models import ModelLocation, ModelAuth
-# from marqo.tensor_search.enums import ModelProperties, InferenceParams
-# from marqo.s2_inference.sbert_utils import Model
-# from marqo.s2_inference.types import Union, FloatTensor, List
-# from marqo.s2_inference.logger import get_logger
-# from marqo.tensor_search.enums import ModelProperties
-# from marqo.s2_inference.errors import InvalidModelPropertiesError, ModelDownloadError
-# from marqo.s2_inference.processing.custom_clip_utils import download_model
-# from marqo.s2_inference.configs import ModelCache
-#
-#
-#
-# logger = get_logger(__name__)
-#
-#
-# class HF_MODEL(Model):
-#
-#     def __init__(self, *args, **kwargs) -> None:
-#         super().__init__(*args, **kwargs)
-#
-#         if self.max_seq_length is None:
-#             self.max_seq_length = 128
-#         self.model_properties = kwargs.get("model_properties", dict())
-#         self.model_name = self.model_properties.get("name", None)
-#         self.model_auth = kwargs.get(InferenceParams.model_auth, None)
-#
-#     def load(self) -> None:
-#
-#         model_location_presence = ModelProperties.model_location in self.model_properties
-#         path = self.model_properties.get("localpath", None) or self.model_properties.get("url", None)
-#         # HF models can be loaded from 3 entries: path (url or localpath), model_name, or model_location
-#         if (path is not None) + (self.model_name is not None) + (model_location_presence is True) != 1:
-#             raise InvalidModelPropertiesError("Exactly one of (`localpath`/`url`) or `model_location`, `name` can be specified"
-#                                               " in `model_properties` for `hf` models as they conflict with each other in model loading."
-#                                               " Please ensure that exactly one of these is specified in `model_properties` and retry.")
-#         elif path is not None:
-#             if validators.url(path) is True:
-#                 self.model_path = download_model(url = path, download_dir=ModelCache.hf_cache_path)
-#             elif os.path.isdir(path) or os.path.isfile(path):
-#                 self.model_path = path
-#         elif self.model_name is not None:
-#             # Loading from structured huggingface repo directly, token is required directly
-#             self.model_path = self.model_name
-#         elif model_location_presence is True:
-#             # This is a special case for huggingface models, where we can load a model directory from a repo
-#             if ("hf" in self.model_properties["model_location"]) and ("repo_id" in self.model_properties["model_location"]["hf"]) and \
-#                 ("filename" not in self.model_properties["model_location"]["hf"]):
-#                 return self._load_from_private_hf_repo()
-#             else:
-#                 self.model_path = self._download_from_repo()
-#
-#         # We need to do extraction here if necessary
-#         self.model_path = extract_huggingface_archive(self.model_path)
-#
-#         self.model = AutoModelForSentenceEmbedding(self.model_path).to(self.device)
-#         try:
-#             self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
-#         except (OSError, ValueError, RuntimeError) as e:
-#             raise InvalidModelPropertiesError(
-#                 f"Marqo encountered an error loading the Hugging Face model = `{self.model_path}` using AutoTokenizer "
-#                 f"Please ensure that the model is a valid Hugging Face model and retry.\n"
-#                 f" Original error message = {e}")
-#         except (HTTPError, ConnectionError) as e:
-#             raise ModelDownloadError(
-#                 f"Marqo encountered an ConnectionError loading the Hugging Face model = `{self.model_path}` using AutoTokenizer. "
-#                 f"This is likely to be caused by an internet issue. Please check Marqo's internet connection to Hugging Face and retry. \n"
-#                 f" Original error message = {e}")
-#
-#     def _load_from_private_hf_repo(self) -> None:
-#         """
-#         Load a private model from a huggingface repo directly using the `repo_id` attribute in `model_properties`
-#         This is a special case for HF models, where we can load a model directory from a repo.
-#         The self.model_path will be set to the repo_id, which is the remote path in the HuggingFace repo.
-#         Token is also used if provided in `model_auth` object.
-#         """
-#         model_location = ModelLocation(**self.model_properties[ModelProperties.model_location])
-#         self.model_path = model_location.hf.repo_id
-#
-#         token = None
-#         if model_location.auth_required:
-#             try:
-#                 token = self.model_auth.hf.token
-#             except AttributeError:
-#                 raise InvalidModelPropertiesError("Please ensure that `model_auth` is valid for a private Hugging Face model and retry. "
-#                                                   "A valid `ModelAuth` object should consist a `hugging face token` attribute for private hf repo models")
-#
-#         self.model = AutoModelForSentenceEmbedding(model_name=self.model_path, use_auth_token=token).to(self.device)
-#         try:
-#             self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, use_auth_token=token)
-#         except (OSError, ValueError, RuntimeError) as e:
-#             raise InvalidModelPropertiesError(f"Marqo encounterend an error loading the Hugging Face model = `{self.model_path}` using AutoTokenizer "
-#                                               f"Please ensure that the model is a valid Hugging Face model, the token is correct, and retry\n"
-#                                               f" Original error message = {e}")
-#         except (HTTPError, ConnectionError) as e:
-#             raise ModelDownloadError(f"Marqo encounters ConnectionError loading the Hugging Face model = `{self.model_path}` using AutoTokenizer. "
-#                                      f"This is likely to be caused by an internet issue. Please check Marqo's internet connection to Hugging Face and retry. \n"
-#                                      f" Original error message = {e}")
-#
-#     def _download_from_repo(self) -> str:
-#         """Downloads model from an external repo like s3 and returns the filepath
-#
-#         Returns:
-#             The model's filepath or a string of hugging face repo name
-#
-#         Raises:
-#             RunTimeError if an empty filepath is detected.
-#         """
-#         model_location = ModelLocation(**self.model_properties[ModelProperties.model_location])
-#         download_model_params = {"repo_location": model_location}
-#
-#         if model_location.auth_required:
-#             download_model_params['auth'] = self.model_auth
-#
-#         model_file_path = download_model(**download_model_params, download_dir=ModelCache.hf_cache_path)
-#         if model_file_path is None or model_file_path == '':
-#             raise RuntimeError(
-#                 'download_model() needs to return a valid filepath to the model! Instead, received '
-#                 f' filepath `{model_file_path}`')
-#         return model_file_path
-#
-#     def encode(self, sentence: Union[str, List[str]], normalize=True, **kwargs) -> Union[FloatTensor, np.ndarray]:
-#
-#         if isinstance(sentence, str):
-#             sentence = [sentence]
-#
-#         if self.model is None:
-#             self.load()
-#
-#         self.model.normalize = normalize
-#         inputs = self.tokenizer(sentence, padding=True, truncation=True, max_length=self.max_seq_length,
-#                                 return_tensors="pt").to(self.device)
-#
-#         with torch.no_grad():
-#             return self._convert_output(self.model.forward(**inputs))
-#
-#     def _convert_output(self, output):
-#         if self.device == 'cpu':
-#             return output.numpy()
-#         elif self.device.startswith('cuda'):
-#             return output.cpu().numpy()
-#
-#
-# class AutoModelForSentenceEmbedding(nn.Module):
-#
-#     def __init__(self, model_name: Optional[str] = None, use_auth_token: Optional[str] = None, normalize=True, pooling='mean'):
-#         super().__init__()
-#         self.model_name = model_name
-#         self.normalize = normalize
-#         self.pooling = pooling
-#         try:
-#             self.model = AutoModel.from_pretrained(model_name, use_auth_token = use_auth_token, cache_dir=ModelCache.hf_cache_path)
-#         except (OSError, ValueError, RuntimeError) as e:
-#             raise InvalidModelPropertiesError(
-#                 f"Marqo encounters error loading the Hugging Face model = `{self.model_path}` using AutoModel "
-#                 f"Please ensure that the model is a valid Hugging Face model and retry.\n"
-#                 f" Original error message = {e}")
-#         except (HTTPError, ConnectionError) as e:
-#             raise ModelDownloadError(
-#                 f"Marqo encounters ConnectionError loading the Hugging Face model = `{self.model_path}` using AutoModel. "
-#                 f"This is likely to be caused by an internet issue. Please check Marqo's internet connection with Hugging Face and retry. \n"
-#                 f" Original error message = {e}")
-#         self.model.eval()
-#         if self.pooling == 'mean':
-#             self._pool_func = self.mean_pooling
-#         elif self.pooling == 'cls':
-#             self._pool_func = self.cls_pooling
-#         else:
-#             raise TypeError(f"{pooling} not in allowed pooling types of 'mean' or 'cls' ")
-#
-#     def forward(self, **kwargs):
-#
-#         model_output = self.model(**kwargs)
-#
-#         embeddings = self._pool_func(model_output, kwargs['attention_mask'])
-#
-#         if self.normalize:
-#             return nn.functional.normalize(embeddings, p=2, dim=1)
-#
-#         return embeddings
-#
-#     def mean_pooling(self, model_output, attention_mask):
-#
-#         token_embeddings = model_output[0]
-#
-#         input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
-#
-#         return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
-#
-#     def cls_pooling(self, model_output, attention_mask):
-#         return model_output[0][:, 0]
-#
-#
-# def extract_huggingface_archive(path: str) -> str:
-#     '''
-#
-#         This function takes the path as input. The path can must be a string that can be:
-#         1. A downloaded archive file. This function will extract the model from the archive return the directory path.
-#         2. A repo_id in huggingface. This function will return the input string directly.
-#
-#         path: the downloaded model archive path or a repo_id in huggingface
-#     Returns:
-#         The directory path to the model or the repo_id in huggingface
-#     '''
-#     if os.path.isfile(path):
-#         # if it's a file, check if it's a compressed file
-#         base, ext = os.path.splitext(path)
-#         if ext in ['.bin', '.pt']:
-#             raise InvalidModelPropertiesError(f"Marqo does not support loading Hugging Face SBERT models from the provided single `{ext}` file. "
-#                                               "Please try to wrap the model in a Hugging Face archive file and try again. ")
-#         try:
-#             # create a new directory with the same name as the file
-#             new_dir = base
-#             os.makedirs(new_dir, exist_ok=True)
-#
-#             # extract the compressed file
-#             # If the target directory already exists, it will be overwritten by default without warning.
-#             if ext == '.zip':
-#                 with zipfile.ZipFile(path, 'r') as zip_ref:
-#                     zip_ref.extractall(new_dir)
-#             else:
-#                 with tarfile.open(path, 'r') as tar_ref:
-#                     tar_ref.extractall(new_dir)
-#             # return the path to the new directory
-#             return new_dir
-#         except (tarfile.ReadError, zipfile.BadZipfile):
-#             try:
-#                 os.remove(path)
-#             except Exception as remove_e:
-#                 raise RuntimeError(
-#                     f"Marqo encountered an error while attempting to delete a corrupted file `{path}`. "
-#                     f"Please report this issue on Marqo's Github Repo and replace the problematic Marqo instance with "
-#                     f"a new one. \n "
-#                     f"Error message: `{str(remove_e)}`"
-#                 )
-#             raise InvalidModelPropertiesError(f'Marqo encountered an error while extracting the compressed model archive from `{path}`.\n '
-#                                               f'This is probably because the file is corrupted or the extension `{ext}` is not supported. '
-#                                               f'Marqo has removed the corrupted file from the disk.'
-#                                               f'Please ensure that the file is a valid compressed file and try again.')
-#         # will this error really happen?
-#         except PermissionError:
-#             raise InvalidModelPropertiesError(f'Marqo encountered an error while extracting the compressed model archive from `{path}`. '
-#                                               f'This is probably because the Marqo does not have the permission to write to the directory. '
-#                                               f'Please check the access permission of Marqo and try again.')
-#         except Exception as e:
-#             raise RuntimeError(f'Marqo encountered an error while extracting the compressed model archive from `{path}`. '
-#                                               f'The original error message is `{str(e)}`')
-#     else:
-#         # return the directory path or repo_id directory
-#         return path
diff --git a/src/marqo/s2_inference/languagebind/audio/modeling_audio.py b/src/marqo/s2_inference/languagebind/audio/modeling_audio.py
index e95dce99f..a34eb4d84 100644
--- a/src/marqo/s2_inference/languagebind/audio/modeling_audio.py
+++ b/src/marqo/s2_inference/languagebind/audio/modeling_audio.py
@@ -569,9 +569,9 @@ def forward(
         >>> from transformers import AutoTokenizer, CLIPTextModel
 
         >>> model = CLIPTextModel.from_pretrained("openai/clip-vit-base-patch32")
-        >>> tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")
+        >>> _tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")
 
-        >>> inputs = tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt")
+        >>> inputs = _tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt")
 
         >>> outputs = model(**inputs)
         >>> last_hidden_state = outputs.last_hidden_state
@@ -833,9 +833,9 @@ def get_text_features(
         >>> from transformers import AutoTokenizer, CLIPModel
 
         >>> model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
-        >>> tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")
+        >>> _tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")
 
-        >>> inputs = tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt")
+        >>> inputs = _tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt")
         >>> text_features = model.get_text_features(**inputs)
         ```"""
         # Use CLIP model's config for some fields (if specified) instead of those of vision & text components.
diff --git a/src/marqo/s2_inference/languagebind/audio/tokenization_audio.py b/src/marqo/s2_inference/languagebind/audio/tokenization_audio.py
index 4e3edfddd..a7c083c9e 100644
--- a/src/marqo/s2_inference/languagebind/audio/tokenization_audio.py
+++ b/src/marqo/s2_inference/languagebind/audio/tokenization_audio.py
@@ -28,9 +28,9 @@
 
 class LanguageBindAudioTokenizer(CLIPTokenizer):
     """
-    Construct a CLIP tokenizer. Based on byte-level Byte-Pair-Encoding.
+    Construct a CLIP _tokenizer. Based on byte-level Byte-Pair-Encoding.
 
-    This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods. Users should refer to
+    This _tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods. Users should refer to
     this superclass for more information regarding those methods.
 
     Args:
diff --git a/src/marqo/s2_inference/languagebind/image/modeling_image.py b/src/marqo/s2_inference/languagebind/image/modeling_image.py
index ba953212e..769418a40 100644
--- a/src/marqo/s2_inference/languagebind/image/modeling_image.py
+++ b/src/marqo/s2_inference/languagebind/image/modeling_image.py
@@ -569,9 +569,9 @@ def forward(
         >>> from transformers import AutoTokenizer, CLIPTextModel
 
         >>> model = CLIPTextModel.from_pretrained("openai/clip-vit-base-patch32")
-        >>> tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")
+        >>> _tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")
 
-        >>> inputs = tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt")
+        >>> inputs = _tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt")
 
         >>> outputs = model(**inputs)
         >>> last_hidden_state = outputs.last_hidden_state
@@ -833,9 +833,9 @@ def get_text_features(
         >>> from transformers import AutoTokenizer, CLIPModel
 
         >>> model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
-        >>> tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")
+        >>> _tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")
 
-        >>> inputs = tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt")
+        >>> inputs = _tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt")
         >>> text_features = model.get_text_features(**inputs)
         ```"""
         # Use CLIP model's config for some fields (if specified) instead of those of vision & text components.
diff --git a/src/marqo/s2_inference/languagebind/image/tokenization_image.py b/src/marqo/s2_inference/languagebind/image/tokenization_image.py
index adbf51263..7572d824b 100644
--- a/src/marqo/s2_inference/languagebind/image/tokenization_image.py
+++ b/src/marqo/s2_inference/languagebind/image/tokenization_image.py
@@ -28,9 +28,9 @@
 
 class LanguageBindImageTokenizer(CLIPTokenizer):
     """
-    Construct a CLIP tokenizer. Based on byte-level Byte-Pair-Encoding.
+    Construct a CLIP _tokenizer. Based on byte-level Byte-Pair-Encoding.
 
-    This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods. Users should refer to
+    This _tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods. Users should refer to
     this superclass for more information regarding those methods.
 
     Args:
diff --git a/src/marqo/s2_inference/languagebind/video/modeling_video.py b/src/marqo/s2_inference/languagebind/video/modeling_video.py
index 7042a6dd8..51f162fd3 100644
--- a/src/marqo/s2_inference/languagebind/video/modeling_video.py
+++ b/src/marqo/s2_inference/languagebind/video/modeling_video.py
@@ -675,9 +675,9 @@ def forward(
         >>> from transformers import AutoTokenizer, CLIPTextModel
 
         >>> model = CLIPTextModel.from_pretrained("openai/clip-vit-base-patch32")
-        >>> tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")
+        >>> _tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")
 
-        >>> inputs = tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt")
+        >>> inputs = _tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt")
 
         >>> outputs = model(**inputs)
         >>> last_hidden_state = outputs.last_hidden_state
@@ -945,9 +945,9 @@ def get_text_features(
         >>> from transformers import AutoTokenizer, CLIPModel
 
         >>> model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
-        >>> tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")
+        >>> _tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")
 
-        >>> inputs = tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt")
+        >>> inputs = _tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt")
         >>> text_features = model.get_text_features(**inputs)
         ```"""
         # Use CLIP model's config for some fields (if specified) instead of those of vision & text components.
diff --git a/src/marqo/s2_inference/languagebind/video/tokenization_video.py b/src/marqo/s2_inference/languagebind/video/tokenization_video.py
index 2c1dee12e..27c1ee555 100644
--- a/src/marqo/s2_inference/languagebind/video/tokenization_video.py
+++ b/src/marqo/s2_inference/languagebind/video/tokenization_video.py
@@ -28,9 +28,9 @@
 
 class LanguageBindVideoTokenizer(CLIPTokenizer):
     """
-    Construct a CLIP tokenizer. Based on byte-level Byte-Pair-Encoding.
+    Construct a CLIP _tokenizer. Based on byte-level Byte-Pair-Encoding.
 
-    This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods. Users should refer to
+    This _tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods. Users should refer to
     this superclass for more information regarding those methods.
 
     Args:
diff --git a/src/marqo/s2_inference/model_registry.py b/src/marqo/s2_inference/model_registry.py
index e5e27bac5..d53ff888f 100644
--- a/src/marqo/s2_inference/model_registry.py
+++ b/src/marqo/s2_inference/model_registry.py
@@ -1,5 +1,6 @@
-from marqo.s2_inference.clip_utils import CLIP, OPEN_CLIP, MULTILINGUAL_CLIP, FP16_CLIP, \
+from marqo.s2_inference.clip_utils import CLIP, MULTILINGUAL_CLIP, FP16_CLIP, \
     get_multilingual_clip_properties
+from marqo.core.inference.models.open_clip_model import OPEN_CLIP
 from marqo.core.inference.models.hugging_face_model import HuggingFaceModel
 from marqo.s2_inference.onnx_clip_utils import CLIP_ONNX
 from marqo.s2_inference.random_utils import Random
diff --git a/src/marqo/s2_inference/multimodal_model_load.py b/src/marqo/s2_inference/multimodal_model_load.py
index 5fc9d03a4..4dd6dec99 100644
--- a/src/marqo/s2_inference/multimodal_model_load.py
+++ b/src/marqo/s2_inference/multimodal_model_load.py
@@ -99,7 +99,7 @@ def _load_languagebind_model(self):
                 'video': 'LanguageBind_Video_V1.5_FT',
             }
         else:
-            raise ValueError(f"Unsupported LanguageBind model: {self.model_name}")
+            raise ValueError(f"Unsupported LanguageBind _model: {self.model_name}")
         model = LanguageBind(clip_type=self.clip_type, cache_dir=ModelCache.languagebind_cache_path).to(self.device)
         model.eval()
         return model
diff --git a/src/marqo/s2_inference/processing/custom_clip_utils.py b/src/marqo/s2_inference/processing/custom_clip_utils.py
index 37428c69b..4287ca861 100644
--- a/src/marqo/s2_inference/processing/custom_clip_utils.py
+++ b/src/marqo/s2_inference/processing/custom_clip_utils.py
@@ -1,169 +1 @@
-import regex as re
-from typing import Union, List, Optional
-import ftfy
-import html
-import os
-import urllib
-from tqdm import tqdm
-import torch
-from urllib.error import HTTPError
-from marqo.s2_inference.configs import ModelCache
-from marqo.s2_inference.errors import ModelDownloadError, InvalidModelPropertiesError
-from marqo.tensor_search.models.private_models import ModelAuth, ModelLocation
-from marqo.s2_inference.model_downloading.from_s3 import (
-    get_presigned_s3_url, get_s3_model_cache_filename, check_s3_model_already_exists,
-    get_s3_model_absolute_cache_path
-)
-from marqo.s2_inference.model_downloading.from_hf import download_model_from_hf
-from marqo.tensor_search.models.external_apis.s3 import S3Auth, S3Location
-
-
-def whitespace_clean(text):
-    text = re.sub(r'\s+', ' ', text)
-    text = text.strip()
-    return text
-
-def basic_clean(text):
-    text = ftfy.fix_text(text)
-    text = html.unescape(html.unescape(text))
-    return text.strip()
-
-
-class HFTokenizer:
-    # HuggingFace tokenizer wrapper
-    # Check https://github.com/mlfoundations/open_clip/blob/16e229c596cafaec46a4defaf27e0e30ffcca12d/src/open_clip/tokenizer.py#L188-L201
-    def __init__(self, tokenizer_name: str):
-        from transformers import AutoTokenizer
-        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
-
-    def __call__(self, texts: Union[str, List[str]]) -> torch.Tensor:
-        # same cleaning as for default tokenizer, except lowercasing
-        # adding lower (for case-sensitive tokenizers) will make it more robust but less sensitive to nuance
-        if isinstance(texts, str):
-            texts = [texts]
-        texts = [whitespace_clean(basic_clean(text)) for text in texts]
-        input_ids = self.tokenizer(texts, return_tensors='pt', padding='max_length', truncation=True).input_ids
-        return input_ids
-
-
-def download_model(
-        repo_location: Optional[ModelLocation] = None,
-        url: Optional[str] = None,
-        auth: Optional[ModelAuth] = None,
-        download_dir: Optional[str] = None
-    ) -> str:
-    """Downloads a custom CLIP model.
-
-    Args:
-        repo_location: object that contains information about the location of a
-            model. For example, s3 bucket and object path
-        url: location of a model specified by a URL
-        auth: object that contains information about authorisation required to
-            download a model. For example, s3 access keys
-        download_dir: The directory where the model should be downloaded.
-
-    Returns:
-        The path of the downloaded model
-    """
-    single_weight_location_validation_msg = (
-        "only exactly one of parameters (repo_location, url) is allowed to be specified.")
-    if repo_location is None and url is None:
-        raise InvalidModelPropertiesError(single_weight_location_validation_msg)
-    if repo_location is not None and url is not None:
-        raise InvalidModelPropertiesError(single_weight_location_validation_msg)
-
-    if url:
-        return download_pretrained_from_url(url=url, cache_dir=download_dir)
-
-    if repo_location.s3:
-        download_kwargs = {'location': repo_location.s3, 'download_dir': download_dir}
-        if auth is not None:
-            download_kwargs['auth'] = auth.s3
-        return download_pretrained_from_s3(**download_kwargs)
-    elif repo_location.hf:
-        download_kwargs = {'location': repo_location.hf, 'download_dir': download_dir}
-        if auth is not None:
-            download_kwargs['auth'] = auth.hf
-        return download_model_from_hf(**download_kwargs)
-
-
-
-def download_pretrained_from_s3(
-        location: S3Location,
-        auth: Optional[S3Auth] = None,
-        download_dir: Optional[str] = None) -> str:
-    """Downloads a pretrained model from S3, if it doesn't exist locally. The basename of the object's
-    key is used for the filename.
-
-    Args:
-        location: Bucket and key of model file to be downloaded
-        auth: AWS IAM access keys to a user with access to the model to be downloaded
-        download_dir: the location where the model should be stored
-
-    Returns:
-        Path to the downloaded model
-    """
-    if check_s3_model_already_exists(location=location, download_dir=download_dir):
-        # TODO: check if abs path is even the most appropriate???
-        return get_s3_model_absolute_cache_path(location=location, download_dir=download_dir)
-
-    url = get_presigned_s3_url(location=location, auth=auth)
-
-    try:
-        return download_pretrained_from_url(
-            url=url, cache_dir=download_dir,
-            cache_file_name=get_s3_model_cache_filename(location)
-        )
-    except HTTPError as e:
-        if e.code == 403:
-            # TODO: add link to auth docs
-            raise ModelDownloadError(
-                "Received 403 error when trying to retrieve model from s3 storage. "
-                "Please check the request's s3 credentials and try again. "
-            ) from e
-        else:
-            raise e
-
-def download_pretrained_from_url(
-        url: str,
-        cache_dir: Union[str, None] = None,
-        cache_file_name: Optional[str] = None,
-) -> str:
-    '''
-    This function takes a clip model checkpoint url as input, downloads the model if it doesn't exist locally,
-    and returns the local path of the downloaded file.
-
-    Args:
-        url: a valid string of the url address.
-        cache_dir: the directory to store the file
-        cache_file_name: name of the model file when it gets downloaded to the cache.
-            If not provided, the basename of the URL is used.
-    Returns:
-        download_target: the local path of the downloaded file.
-    '''
-    buffer_size = 8192
-    if not cache_dir:
-        cache_dir = os.path.expanduser(ModelCache.clip_cache_path)
-    os.makedirs(cache_dir, exist_ok=True)
-
-    if cache_file_name is None:
-        filename = os.path.basename(url)
-    else:
-        filename = cache_file_name
-
-    download_target = os.path.join(cache_dir, filename)
-
-    if os.path.isfile(download_target):
-        return download_target
-
-    with urllib.request.urlopen(url) as source, open(download_target, "wb") as output:
-        with tqdm(total=int(source.headers.get("Content-Length")), ncols=80, unit='iB', unit_scale=True) as loop:
-            while True:
-                buffer = source.read(buffer_size)
-                if not buffer:
-                    break
-
-                output.write(buffer)
-                loop.update(len(buffer))
-
-    return download_target
+#
\ No newline at end of file
diff --git a/src/marqo/s2_inference/processing/yolox_utils.py b/src/marqo/s2_inference/processing/yolox_utils.py
index 132560ba7..33d46fbfd 100644
--- a/src/marqo/s2_inference/processing/yolox_utils.py
+++ b/src/marqo/s2_inference/processing/yolox_utils.py
@@ -89,7 +89,7 @@ def demo_postprocess(outputs: ndarray, img_size: Tuple[int, int], p6: bool = Fal
     Args:
         outputs (ndarray): the outputs from the yolox model inference
         img_size (Tuple[int, int]): the size of the input image
-        p6 (bool, optional): model architecture parameter. marqo-yolo v1 and v2 should be False. 
+        p6 (bool, optional): model architecture parameter. marqo-yolo v1 and v2 should be False.
                             check the model architecture for anything else. Defaults to False.
 
     Returns:
diff --git a/src/marqo/s2_inference/random_utils.py b/src/marqo/s2_inference/random_utils.py
index 696fddb33..34174c803 100644
--- a/src/marqo/s2_inference/random_utils.py
+++ b/src/marqo/s2_inference/random_utils.py
@@ -1,4 +1,4 @@
-# implements a 'model' that returns a random vector, irrespective of 
+# implements a 'model' that returns a random vector, irrespective of
 # input. does not require a model and is completely random
 # used for testing purposes
 import functools
diff --git a/src/marqo/s2_inference/reranking/model_utils.py b/src/marqo/s2_inference/reranking/model_utils.py
index 1fc7d9c89..8d4778f08 100644
--- a/src/marqo/s2_inference/reranking/model_utils.py
+++ b/src/marqo/s2_inference/reranking/model_utils.py
@@ -142,7 +142,7 @@ def predict(self, inputs: Iterable):
         return np.random.rand(len(inputs))
 
 class HFClassificationOnnx:
-    """uses HF pipelines and optimum to load hf classification model 
+    """uses HF pipelines and optimum to load hf classification model
     (cross encoders) and uses it as onnx
     https://huggingface.co/docs/optimum/main/en/onnxruntime/modeling_ort
     
@@ -229,7 +229,7 @@ def predict(self, inputs: List[Dict]) -> List[Dict]:
             List[Dict]: _description_
         """
         self.inputs = self._prepare_inputs(inputs)
-        # couldn't find aaaaany documentation on passing tokenizer arguments through the pipeline
+        # couldn't find aaaaany documentation on passing _tokenizer arguments through the pipeline
         # leaving these here for reference
         # https://github.com/huggingface/transformers/blob/main/src/transformers/pipelines/__init__.py#L750
         # https://stackoverflow.com/questions/67849833/how-to-truncate-input-in-the-huggingface-pipeline
@@ -276,7 +276,7 @@ def load_sbert_cross_encoder_model(model_name: str, device: str, max_length: int
 def load_hf_cross_encoder_model(model_name: str, device: str) -> Dict:
     """    
     
-    features = tokenizer(['How many people live in Berlin?', 'How many people live in Berlin?'], ['Berlin has a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers.', 'New York City is famous for the Metropolitan Museum of Art.'],  padding=True, truncation=True, return_tensors="pt")
+    features = _tokenizer(['How many people live in Berlin?', 'How many people live in Berlin?'], ['Berlin has a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers.', 'New York City is famous for the Metropolitan Museum of Art.'],  padding=True, truncation=True, return_tensors="pt")
     with torch.no_grad():
         scores = model(**features).logits
 
@@ -300,7 +300,7 @@ def load_hf_cross_encoder_model(model_name: str, device: str) -> Dict:
 
     model.eval()
     
-    return {'model':model, 'tokenizer':tokenizer}
+    return {'model':model, '_tokenizer':tokenizer}
 
 def load_owl_vit(model_name: str, device: str) -> Dict:
     """loader for owl vit for image reranking
diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py
index 14af1f370..89f6bd3cd 100644
--- a/src/marqo/s2_inference/s2_inference.py
+++ b/src/marqo/s2_inference/s2_inference.py
@@ -15,7 +15,8 @@
 from marqo import marqo_docs
 from marqo.api.exceptions import ModelCacheManagementError, ConfigurationError, InternalError
 from marqo.s2_inference import constants
-from marqo.s2_inference.clip_utils import CLIP, OPEN_CLIP
+from marqo.core.inference.models.open_clip_model import OPEN_CLIP
+from marqo.s2_inference.clip_utils import CLIP
 from marqo.s2_inference.configs import get_default_normalization, get_default_seq_length
 from marqo.s2_inference.errors import (
     VectoriseError, InvalidModelPropertiesError, ModelLoadError,
diff --git a/tests/core/inference/test_hugging_face_model.py b/tests/core/inference/test_hugging_face_model.py
index d674160a5..46b2ecbd1 100644
--- a/tests/core/inference/test_hugging_face_model.py
+++ b/tests/core/inference/test_hugging_face_model.py
@@ -652,7 +652,7 @@ def test_hf_e5_base_v2_embeddings_load_from_zip_file(self):
     @mock.patch("transformers.AutoModel.from_pretrained", return_value=mock.MagicMock())
     @mock.patch("transformers.AutoTokenizer.from_pretrained", side_effect=OSError("Tokenizer load failed"))
     def test_tokenizer_loading_failure(self, mock_auto_model, mock_auto_tokenizer):
-        """Test that an error is raised when the tokenizer fails to load."""
+        """Test that an error is raised when the _tokenizer fails to load."""
         model_properties = {
             "name": "test-model",
             "type": "hf",
diff --git a/tests/s2_inference/open_clip_models/test_marqo_fashion_clip.py b/tests/s2_inference/open_clip_models/test_marqo_fashion_clip.py
index ead7e245b..b45e72f0d 100644
--- a/tests/s2_inference/open_clip_models/test_marqo_fashion_clip.py
+++ b/tests/s2_inference/open_clip_models/test_marqo_fashion_clip.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-from marqo.s2_inference.clip_utils import OPEN_CLIP
+from marqo.core.inference.models.open_clip_model import OPEN_CLIP
 from marqo.s2_inference.model_registry import _get_open_clip_properties
 from marqo.s2_inference.s2_inference import clear_loaded_models
 
diff --git a/tests/s2_inference/open_clip_models/test_open_clip_model_load.py b/tests/s2_inference/open_clip_models/test_open_clip_model_load.py
index 5f64bf41d..870473f58 100644
--- a/tests/s2_inference/open_clip_models/test_open_clip_model_load.py
+++ b/tests/s2_inference/open_clip_models/test_open_clip_model_load.py
@@ -1,7 +1,7 @@
 from unittest import TestCase
 from unittest.mock import patch, MagicMock
 
-from marqo.s2_inference.clip_utils import OPEN_CLIP
+from marqo.core.inference.models.open_clip_model import OPEN_CLIP
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.model_registry import _get_open_clip_properties
 
@@ -27,9 +27,9 @@ def test_load_OpenCLIPModelFromCheckPointMethod_success(self):
             "type": "open_clip"
         }
 
-        with patch("marqo.s2_inference.clip_utils.OPEN_CLIP._load_model_and_image_preprocessor_from_checkpoint", \
+        with patch("marqo.core.inference.models.open_clip_model import OPEN_CLIP._load_model_and_image_preprocessor_from_checkpoint", \
                    return_value=(MagicMock(), MagicMock())) as mock_load_method:
-            with patch("marqo.s2_inference.clip_utils.OPEN_CLIP._load_tokenizer_from_checkpoint",
+            with patch("marqo.core.inference.models.open_clip_model import OPEN_CLIP._load_tokenizer_from_checkpoint",
                        return_value=MagicMock()) as mock_load_tokenizer:
                 with patch.object(MagicMock(), 'eval', return_value=None) as mock_eval:
                     model = OPEN_CLIP(model_properties=model_properties, device="cpu")
diff --git a/tests/s2_inference/test_clip_utils.py b/tests/s2_inference/test_clip_utils.py
index b7325732e..556bc092e 100644
--- a/tests/s2_inference/test_clip_utils.py
+++ b/tests/s2_inference/test_clip_utils.py
@@ -8,7 +8,8 @@
 
 from marqo.api.exceptions import InternalError
 from marqo.s2_inference import clip_utils, types
-from marqo.s2_inference.clip_utils import CLIP, OPEN_CLIP, FP16_CLIP, MULTILINGUAL_CLIP
+from marqo.s2_inference.clip_utils import CLIP, FP16_CLIP, MULTILINGUAL_CLIP
+from marqo.core.inference.models.open_clip_model import OPEN_CLIP
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.errors import ImageDownloadError
 from marqo.tensor_search.enums import ModelProperties
diff --git a/tests/tensor_search/test_model_auth.py b/tests/tensor_search/test_model_auth.py
index d46f46496..ccdcf2c5e 100644
--- a/tests/tensor_search/test_model_auth.py
+++ b/tests/tensor_search/test_model_auth.py
@@ -1316,7 +1316,7 @@ def test_2_load_model_from_hf_zip_file_without_auth_search(self):
         assert len(mock_automodel_from_pretrained.call_args_list) == 1
         assert mock_automodel_from_pretrained.call_args_list[0][0][0] == 'cache/path/to/model/', "Expected call not found"
 
-        # is the hf tokenizer being loaded with the expected args?
+        # is the hf _tokenizer being loaded with the expected args?
         assert len(mock_autotokenizer_from_pretrained.call_args_list) == 1
         assert mock_autotokenizer_from_pretrained.call_args_list[0][0][0] == 'cache/path/to/model/', "Expected call not found"
 
diff --git a/tests/tensor_search/test_on_start_script.py b/tests/tensor_search/test_on_start_script.py
index d9a086d5d..261dbd3b1 100644
--- a/tests/tensor_search/test_on_start_script.py
+++ b/tests/tensor_search/test_on_start_script.py
@@ -60,7 +60,7 @@ def test_preload_url_models(self):
         }
 
         clip_model_expected = (
-            "generic-clip-test-model-2", 
+            "generic-clip-test-model-2",
             "ViT-B/32", 
             512, 
             "clip", 

From 3e9717f566ef272f6ac950ce81bf1dd4f480423e Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Mon, 7 Oct 2024 11:23:52 +1100
Subject: [PATCH 33/63] Fix tests

---
 .../processing/custom_clip_utils.py           |  1 -
 .../test_corrupt_file_error_handling.py       |  9 ++---
 tests/s2_inference/test_custom_clip_utils.py  |  2 +-
 tests/s2_inference/test_generic_clip_model.py |  2 +-
 tests/tensor_search/test_model_auth.py        | 35 ++++++++++++-------
 5 files changed, 29 insertions(+), 20 deletions(-)
 delete mode 100644 src/marqo/s2_inference/processing/custom_clip_utils.py

diff --git a/src/marqo/s2_inference/processing/custom_clip_utils.py b/src/marqo/s2_inference/processing/custom_clip_utils.py
deleted file mode 100644
index 4287ca861..000000000
--- a/src/marqo/s2_inference/processing/custom_clip_utils.py
+++ /dev/null
@@ -1 +0,0 @@
-#
\ No newline at end of file
diff --git a/tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py b/tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py
index a1d8e9c27..df12280de 100644
--- a/tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py
+++ b/tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py
@@ -6,7 +6,7 @@
 
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.errors import InvalidModelPropertiesError
-from marqo.core.inference.models.hugging_face_model import extract_huggingface_archive
+from marqo.core.inference.models.hugging_face_model import HuggingFaceModel
 from marqo.s2_inference.s2_inference import _load_model
 
 
@@ -259,11 +259,12 @@ def test_directory(self):
             patch("marqo.core.inference.models.hugging_face_model.download_model", return_value="/path/to/file.tar"), \
             patch("transformers.AutoModel.from_pretrained") as mock_model, \
             patch("transformers.AutoTokenizer.from_pretrained") as mock_tokenizer:
-            self.assertEqual(extract_huggingface_archive('/path/to/directory'), '/path/to/directory')
+            self.assertEqual(HuggingFaceModel.extract_huggingface_archive('/path/to/directory'), '/path/to/directory')
 
     def test_hf_repo_id(self):
         with patch('os.path.isfile', return_value=False):
-            self.assertEqual(extract_huggingface_archive('sentence-transformers/all-MiniLM-L6-v2'), 'sentence-transformers/all-MiniLM-L6-v2')
+            self.assertEqual(HuggingFaceModel.extract_huggingface_archive('sentence-transformers/all-MiniLM-L6-v2'),
+            'sentence-transformers/all-MiniLM-L6-v2')
 
     def test_extraction_failure(self):
         with patch('os.path.isfile', return_value=True), \
@@ -272,5 +273,5 @@ def test_extraction_failure(self):
              patch('zipfile.ZipFile', side_effect=zipfile.BadZipfile), \
              patch('os.remove') as mock_remove:
             with self.assertRaises(InvalidModelPropertiesError):
-                extract_huggingface_archive('/path/to/file.zip')
+                HuggingFaceModel.extract_huggingface_archive('/path/to/file.zip')
             mock_remove.assert_called_once_with('/path/to/file.zip')
diff --git a/tests/s2_inference/test_custom_clip_utils.py b/tests/s2_inference/test_custom_clip_utils.py
index 28a5a96da..3c35dfa3d 100644
--- a/tests/s2_inference/test_custom_clip_utils.py
+++ b/tests/s2_inference/test_custom_clip_utils.py
@@ -1,7 +1,7 @@
 import unittest
 import urllib
 from unittest.mock import patch, MagicMock
-from marqo.s2_inference.processing.custom_clip_utils import (
+from marqo.core.inference.model_download import (
     download_pretrained_from_s3, download_model, download_pretrained_from_url,
     ModelDownloadError, S3Auth, S3Location, ModelAuth, ModelLocation
 )
diff --git a/tests/s2_inference/test_generic_clip_model.py b/tests/s2_inference/test_generic_clip_model.py
index 373f6d648..30e71f6c3 100644
--- a/tests/s2_inference/test_generic_clip_model.py
+++ b/tests/s2_inference/test_generic_clip_model.py
@@ -6,7 +6,7 @@
 from marqo.api.exceptions import IndexNotFoundError
 from marqo.s2_inference.errors import UnknownModelError, ModelLoadError
 from marqo.tensor_search import tensor_search
-from marqo.s2_inference.processing.custom_clip_utils import download_pretrained_from_url
+from marqo.core.inference.model_download import download_pretrained_from_s3
 from marqo.s2_inference.s2_inference import clear_loaded_models
 from marqo.s2_inference.s2_inference import (
     vectorise,
diff --git a/tests/tensor_search/test_model_auth.py b/tests/tensor_search/test_model_auth.py
index ccdcf2c5e..692ebc92e 100644
--- a/tests/tensor_search/test_model_auth.py
+++ b/tests/tensor_search/test_model_auth.py
@@ -15,8 +15,8 @@
 import unittest
 from marqo.s2_inference.s2_inference import clear_loaded_models
 from transformers import AutoModel, AutoTokenizer
-from marqo.s2_inference.processing.custom_clip_utils import download_pretrained_from_url
-from marqo.core.inference.models.hugging_face_model import extract_huggingface_archive
+from marqo.core.inference.model_download import download_pretrained_from_url
+from marqo.core.inference.models.hugging_face_model import HuggingFaceModel
 import os
 from marqo.api.exceptions import BadRequestError, ModelNotInCacheError
 from marqo.tensor_search.models.api_models import BulkSearchQuery, BulkSearchQueryEntity
@@ -1237,7 +1237,8 @@ def test_1_load_model_from_hf_zip_file_with_auth_search(self):
         with unittest.mock.patch('transformers.AutoModel.from_pretrained', mock_automodel_from_pretrained):
             with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained', mock_autotokenizer_from_pretrained):
                 with unittest.mock.patch('marqo.s2_inference.model_downloading.from_hf.hf_hub_download', mock_hf_hub_download):
-                    with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.extract_huggingface_archive", mock_extract_huggingface_archive):
+                    with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.HuggingFaceModel."
+                    "extract_huggingface_archive", mock_extract_huggingface_archive):
                         try:
                             res = tensor_search.search(
                                 config=self.config, text='hello', index_name=self.index_name_1,
@@ -1297,7 +1298,8 @@ def test_2_load_model_from_hf_zip_file_without_auth_search(self):
         with unittest.mock.patch('transformers.AutoModel.from_pretrained', mock_automodel_from_pretrained):
             with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained', mock_autotokenizer_from_pretrained):
                 with unittest.mock.patch('marqo.s2_inference.model_downloading.from_hf.hf_hub_download', mock_hf_hub_download):
-                    with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.extract_huggingface_archive", mock_extract_huggingface_archive):
+                    with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.HuggingFaceModel."
+                    "extract_huggingface_archive", mock_extract_huggingface_archive):
                         try:
                             res = tensor_search.search(
                                 config=self.config, text='hello', index_name=self.index_name_1,)
@@ -1368,7 +1370,8 @@ def test_3_load_model_from_s3_zip_file_with_auth_search(self):
             with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained',mock_autotokenizer_from_pretrained):
                 with unittest.mock.patch('boto3.client', return_value=mock_s3_client) as mock_boto3_client:
                     with unittest.mock.patch("marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url", mock_download_pretrained_from_url):
-                        with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.extract_huggingface_archive", mock_extract_huggingface_archive):
+                        with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.HuggingFaceModel."
+                        "extract_huggingface_archive", mock_extract_huggingface_archive):
                             try:
                                 res = tensor_search.search(
                                     config=self.config, text='hello', index_name=self.index_name_1,
@@ -1416,13 +1419,14 @@ def test_4_load_model_from_public_url_zip_file_search(self):
         tensor_search.create_vector_index(config=self.config, index_name=self.index_name_1,
                                           index_settings=s3_settings)
 
-        mock_extract_huggingface_archive = mock.MagicMock(side_effect=extract_huggingface_archive)
+        mock_extract_huggingface_archive = mock.MagicMock(side_effect=HuggingFaceModel.extract_huggingface_archive)
         mock_automodel_from_pretrained = mock.MagicMock(side_effect=AutoModel.from_pretrained)
         mock_download = mock.MagicMock(side_effect=download_pretrained_from_url)
 
         with mock.patch('transformers.AutoModel.from_pretrained', new=mock_automodel_from_pretrained):
             with mock.patch('marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url', new=mock_download):
-                with mock.patch("marqo.core.inference.models.hugging_face_model.extract_huggingface_archive", new=mock_extract_huggingface_archive):
+                with mock.patch("marqo.core.inference.models.hugging_face_model.HuggingFaceModel."
+                "extract_huggingface_archive", new=mock_extract_huggingface_archive):
                     res = tensor_search.search(config=self.config, text='hello', index_name=self.index_name_1)
 
         assert len(mock_extract_huggingface_archive.call_args_list) == 1
@@ -1568,7 +1572,8 @@ def test_1_load_model_from_hf_zip_file_with_auth_add_documents(self):
         with unittest.mock.patch('transformers.AutoModel.from_pretrained', mock_automodel_from_pretrained):
             with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained', mock_autotokenizer_from_pretrained):
                 with unittest.mock.patch('marqo.s2_inference.model_downloading.from_hf.hf_hub_download', mock_hf_hub_download):
-                    with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.extract_huggingface_archive", mock_extract_huggingface_archive):
+                    with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.HuggingFaceModel."
+                    "extract_huggingface_archive", mock_extract_huggingface_archive):
                         try:
                             tensor_search.add_documents(config=self.config, add_docs_params=AddDocsParams(
                                 index_name=self.index_name_1, auto_refresh=True, docs=[{'a': 'b'}],
@@ -1628,7 +1633,8 @@ def test_2_load_model_from_hf_zip_file_without_auth_add_documents(self):
         with unittest.mock.patch('transformers.AutoModel.from_pretrained', mock_automodel_from_pretrained):
             with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained', mock_autotokenizer_from_pretrained):
                 with unittest.mock.patch('marqo.s2_inference.model_downloading.from_hf.hf_hub_download', mock_hf_hub_download):
-                    with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.extract_huggingface_archive", mock_extract_huggingface_archive):
+                    with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.HuggingFaceModel."
+                    "extract_huggingface_archive", mock_extract_huggingface_archive):
                         try:
                             tensor_search.add_documents(config=self.config, add_docs_params=AddDocsParams(
                                 index_name=self.index_name_1, auto_refresh=True, docs=[{'a': 'b'}], device="cpu"))
@@ -1699,7 +1705,8 @@ def test_3_load_model_from_s3_zip_file_with_auth_add_documents(self):
                     with unittest.mock.patch(
                             "marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url",
                             mock_download_pretrained_from_url):
-                        with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.extract_huggingface_archive",
+                        with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.HuggingFaceModel."
+                        "extract_huggingface_archive",
                                                  mock_extract_huggingface_archive):
                             try:
                                 tensor_search.add_documents(config=self.config, add_docs_params=AddDocsParams(
@@ -1750,13 +1757,14 @@ def test_4_load_model_from_public_url_zip_file_add_documents(self):
         tensor_search.create_vector_index(config=self.config, index_name=self.index_name_1,
                                           index_settings=s3_settings)
 
-        mock_extract_huggingface_archive = mock.MagicMock(side_effect=extract_huggingface_archive)
+        mock_extract_huggingface_archive = mock.MagicMock(side_effect=HuggingFaceModel.extract_huggingface_archive)
         mock_automodel_from_pretrained = mock.MagicMock(side_effect=AutoModel.from_pretrained)
         mock_download = mock.MagicMock(side_effect=download_pretrained_from_url)
 
         with mock.patch('transformers.AutoModel.from_pretrained', new=mock_automodel_from_pretrained):
             with mock.patch('marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url', new=mock_download):
-                with mock.patch("marqo.core.inference.models.hugging_face_model.extract_huggingface_archive", new=mock_extract_huggingface_archive):
+                with mock.patch("marqo.core.inference.models.hugging_face_model.HuggingFaceModel."
+                "extract_huggingface_archive", new=mock_extract_huggingface_archive):
                     tensor_search.add_documents(config=self.config, add_docs_params=AddDocsParams(
                         index_name=self.index_name_1, auto_refresh=True, docs=[{'a': 'b'}], device="cpu"))
 
@@ -2477,7 +2485,8 @@ def test_bulk_search(self):
                 with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained', mock_autotokenizer_from_pretrained):
                     with unittest.mock.patch('boto3.client', return_value=mock_s3_client) as mock_boto3_client:
                         with unittest.mock.patch("marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url",mock_download_pretrained_from_url):
-                            with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.extract_huggingface_archive", mock_extract_huggingface_archive):
+                            with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.HuggingFaceModel."
+                            "extract_huggingface_archive", mock_extract_huggingface_archive):
                                 try:
                                     tensor_search.bulk_search(
                                         query=bulk_search_query,

From 38aa7809bf51ac1d53be0366d54d1cee63453156 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Mon, 7 Oct 2024 11:40:24 +1100
Subject: [PATCH 34/63] Fix tests

---
 .../inference/download_model_from_hf.py}             |  0
 .../inference/download_model_from_s3.py}             |  0
 src/marqo/core/inference/model_download.py           |  4 ++--
 src/marqo/s2_inference/model_downloading/__init__.py |  0
 .../inference}/test_corrupt_file_error_handling.py   |  3 +--
 .../inference/test_download_mode_from_hf.py}         |  4 ++--
 .../inference/test_download_model.py}                |  0
 .../inference/test_download_model_from_s3.py}        | 12 +++++++-----
 tests/tensor_search/test_model_auth.py               |  5 ++---
 tests/tensor_search/test_model_auth_cuda.py          |  7 +++----
 10 files changed, 17 insertions(+), 18 deletions(-)
 rename src/marqo/{s2_inference/model_downloading/from_hf.py => core/inference/download_model_from_hf.py} (100%)
 rename src/marqo/{s2_inference/model_downloading/from_s3.py => core/inference/download_model_from_s3.py} (100%)
 delete mode 100644 src/marqo/s2_inference/model_downloading/__init__.py
 rename tests/{s2_inference/model_downloading => core/inference}/test_corrupt_file_error_handling.py (99%)
 rename tests/{s2_inference/model_downloading/test_from_hf.py => core/inference/test_download_mode_from_hf.py} (97%)
 rename tests/{s2_inference/test_custom_clip_utils.py => core/inference/test_download_model.py} (100%)
 rename tests/{s2_inference/model_downloading/test_from_s3.py => core/inference/test_download_model_from_s3.py} (98%)

diff --git a/src/marqo/s2_inference/model_downloading/from_hf.py b/src/marqo/core/inference/download_model_from_hf.py
similarity index 100%
rename from src/marqo/s2_inference/model_downloading/from_hf.py
rename to src/marqo/core/inference/download_model_from_hf.py
diff --git a/src/marqo/s2_inference/model_downloading/from_s3.py b/src/marqo/core/inference/download_model_from_s3.py
similarity index 100%
rename from src/marqo/s2_inference/model_downloading/from_s3.py
rename to src/marqo/core/inference/download_model_from_s3.py
diff --git a/src/marqo/core/inference/model_download.py b/src/marqo/core/inference/model_download.py
index a25ec4ae6..0360611af 100644
--- a/src/marqo/core/inference/model_download.py
+++ b/src/marqo/core/inference/model_download.py
@@ -7,8 +7,8 @@
 
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.errors import ModelDownloadError, InvalidModelPropertiesError
-from marqo.s2_inference.model_downloading.from_hf import download_model_from_hf
-from marqo.s2_inference.model_downloading.from_s3 import (
+from marqo.core.inference.download_model_from_hf import download_model_from_hf
+from marqo.core.inference.download_model_from_s3 import (
     get_presigned_s3_url, get_s3_model_cache_filename, check_s3_model_already_exists,
     get_s3_model_absolute_cache_path
 )
diff --git a/src/marqo/s2_inference/model_downloading/__init__.py b/src/marqo/s2_inference/model_downloading/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py b/tests/core/inference/test_corrupt_file_error_handling.py
similarity index 99%
rename from tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py
rename to tests/core/inference/test_corrupt_file_error_handling.py
index df12280de..12e598099 100644
--- a/tests/s2_inference/model_downloading/test_corrupt_file_error_handling.py
+++ b/tests/core/inference/test_corrupt_file_error_handling.py
@@ -4,9 +4,8 @@
 
 import pytest
 
-from marqo.s2_inference.configs import ModelCache
-from marqo.s2_inference.errors import InvalidModelPropertiesError
 from marqo.core.inference.models.hugging_face_model import HuggingFaceModel
+from marqo.s2_inference.errors import InvalidModelPropertiesError
 from marqo.s2_inference.s2_inference import _load_model
 
 
diff --git a/tests/s2_inference/model_downloading/test_from_hf.py b/tests/core/inference/test_download_mode_from_hf.py
similarity index 97%
rename from tests/s2_inference/model_downloading/test_from_hf.py
rename to tests/core/inference/test_download_mode_from_hf.py
index 02e02cda5..9eb9922f6 100644
--- a/tests/s2_inference/model_downloading/test_from_hf.py
+++ b/tests/core/inference/test_download_mode_from_hf.py
@@ -1,8 +1,8 @@
 import unittest
-from unittest.mock import MagicMock, patch
+from unittest.mock import patch
 from marqo.s2_inference.errors import ModelDownloadError
 from marqo.tensor_search.models.external_apis.hf import HfAuth, HfModelLocation
-from marqo.s2_inference.model_downloading.from_hf import download_model_from_hf
+from marqo.core.inference.download_model_from_hf import download_model_from_hf
 from huggingface_hub.errors import RepositoryNotFoundError
 from marqo.s2_inference.configs import ModelCache
 
diff --git a/tests/s2_inference/test_custom_clip_utils.py b/tests/core/inference/test_download_model.py
similarity index 100%
rename from tests/s2_inference/test_custom_clip_utils.py
rename to tests/core/inference/test_download_model.py
diff --git a/tests/s2_inference/model_downloading/test_from_s3.py b/tests/core/inference/test_download_model_from_s3.py
similarity index 98%
rename from tests/s2_inference/model_downloading/test_from_s3.py
rename to tests/core/inference/test_download_model_from_s3.py
index 161bcb93a..214d42e91 100644
--- a/tests/s2_inference/model_downloading/test_from_s3.py
+++ b/tests/core/inference/test_download_model_from_s3.py
@@ -1,14 +1,16 @@
-from marqo.s2_inference.model_downloading.from_s3 import (
+import unittest
+from unittest.mock import patch
+
+import botocore
+from botocore.exceptions import NoCredentialsError
+
+from marqo.core.inference.download_model_from_s3 import (
     get_presigned_s3_url,
     get_s3_model_absolute_cache_path,
     check_s3_model_already_exists,
     get_s3_model_cache_filename,
 )
-from botocore.exceptions import NoCredentialsError
 from marqo.s2_inference.configs import ModelCache
-import unittest
-import botocore
-from unittest.mock import patch
 from marqo.s2_inference.errors import ModelDownloadError
 from marqo.tensor_search.models.external_apis.s3 import S3Auth, S3Location
 
diff --git a/tests/tensor_search/test_model_auth.py b/tests/tensor_search/test_model_auth.py
index 692ebc92e..516a49663 100644
--- a/tests/tensor_search/test_model_auth.py
+++ b/tests/tensor_search/test_model_auth.py
@@ -7,9 +7,9 @@
 from marqo.tensor_search import tensor_search
 from marqo.tensor_search.models.add_docs_objects import AddDocsParams
 from marqo.tensor_search.models.private_models import S3Auth, ModelAuth, HfAuth
-from marqo.api.exceptions import InvalidArgError, IndexNotFoundError, BadRequestError
+from marqo.api.exceptions import InvalidArgError, IndexNotFoundError
 from tests.marqo_test import MarqoTestCase, TestImageUrls
-from marqo.s2_inference.model_downloading.from_s3 import get_s3_model_absolute_cache_path
+from marqo.core.inference.download_model_from_s3 import get_s3_model_absolute_cache_path
 from marqo.tensor_search.models.external_apis.s3 import S3Location
 from unittest import mock
 import unittest
@@ -22,7 +22,6 @@
 from marqo.tensor_search.models.api_models import BulkSearchQuery, BulkSearchQueryEntity
 from marqo.s2_inference.configs import ModelCache
 import shutil
-from marqo.tensor_search.models.external_apis.hf import HfModelLocation
 from marqo.tensor_search.models.private_models import ModelLocation
 from pydantic.error_wrappers import ValidationError
 
diff --git a/tests/tensor_search/test_model_auth_cuda.py b/tests/tensor_search/test_model_auth_cuda.py
index 65856f272..189490a9a 100644
--- a/tests/tensor_search/test_model_auth_cuda.py
+++ b/tests/tensor_search/test_model_auth_cuda.py
@@ -4,10 +4,10 @@
 """
 from marqo.tensor_search import tensor_search
 from marqo.tensor_search.models.add_docs_objects import AddDocsParams
-from marqo.tensor_search.models.private_models import S3Auth, ModelAuth, HfAuth
-from marqo.api.exceptions import InvalidArgError, IndexNotFoundError, BadRequestError
+from marqo.tensor_search.models.private_models import S3Auth, ModelAuth
+from marqo.api.exceptions import IndexNotFoundError
 from tests.marqo_test import MarqoTestCase
-from marqo.s2_inference.model_downloading.from_s3 import get_s3_model_absolute_cache_path
+from marqo.core.inference.download_model_from_s3 import get_s3_model_absolute_cache_path
 from marqo.tensor_search.models.external_apis.s3 import S3Location
 from unittest import mock
 from tests.tensor_search.test_model_auth import _delete_file, _get_base_index_settings
@@ -15,7 +15,6 @@
 import os
 import torch
 import pytest
-from marqo.api.exceptions import BadRequestError
 
 
 @pytest.mark.largemodel

From 878ace58ada75868e1b7c135661a8629cf08aaf7 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Mon, 7 Oct 2024 11:46:26 +1100
Subject: [PATCH 35/63] Fix tests

---
 src/marqo/core/inference/model_download.py | 25 ++++++++++++----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/marqo/core/inference/model_download.py b/src/marqo/core/inference/model_download.py
index 0360611af..4756e38e9 100644
--- a/src/marqo/core/inference/model_download.py
+++ b/src/marqo/core/inference/model_download.py
@@ -5,6 +5,7 @@
 
 from tqdm import tqdm
 
+from marqo.core.exceptions import InternalError
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.errors import ModelDownloadError, InvalidModelPropertiesError
 from marqo.core.inference.download_model_from_hf import download_model_from_hf
@@ -45,17 +46,19 @@ def download_model(
 
     if url:
         return download_pretrained_from_url(url=url, cache_dir=download_dir)
-
-    if repo_location.s3:
-        download_kwargs = {'location': repo_location.s3, 'download_dir': download_dir}
-        if auth is not None:
-            download_kwargs['auth'] = auth.s3
-        return download_pretrained_from_s3(**download_kwargs)
-    elif repo_location.hf:
-        download_kwargs = {'location': repo_location.hf, 'download_dir': download_dir}
-        if auth is not None:
-            download_kwargs['auth'] = auth.hf
-        return download_model_from_hf(**download_kwargs)
+    if isinstance(repo_location, ModelLocation):
+        if repo_location.s3:
+            download_kwargs = {'location': repo_location.s3, 'download_dir': download_dir}
+            if auth is not None:
+                download_kwargs['auth'] = auth.s3
+            return download_pretrained_from_s3(**download_kwargs)
+        elif repo_location.hf:
+            download_kwargs = {'location': repo_location.hf, 'download_dir': download_dir}
+            if auth is not None:
+                download_kwargs['auth'] = auth.hf
+            return download_model_from_hf(**download_kwargs)
+    else:
+        raise InternalError("Invalid model location object provided.")
 
 
 def download_pretrained_from_s3(

From bc354ae9ae4c3d8ba6e30c82d907c120a99e6db0 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Mon, 7 Oct 2024 13:11:19 +1100
Subject: [PATCH 36/63] Fix tests

---
 .../test_corrupt_file_error_handling.py       | 18 ++++----
 .../inference/test_download_mode_from_hf.py   | 20 ++++-----
 tests/core/inference/test_download_model.py   | 42 +++++++++----------
 3 files changed, 42 insertions(+), 38 deletions(-)

diff --git a/tests/core/inference/test_corrupt_file_error_handling.py b/tests/core/inference/test_corrupt_file_error_handling.py
index 12e598099..48b555662 100644
--- a/tests/core/inference/test_corrupt_file_error_handling.py
+++ b/tests/core/inference/test_corrupt_file_error_handling.py
@@ -59,17 +59,18 @@ def setUp(self):
     @patch('open_clip.create_model', autospec=True)
     @patch('os.remove', autospec=True)
     def test_corrupted_file_handling(self, mock_os_remove, mock_create_model_and_transforms):
-        # Setup
+        """Ensure that a proper error is raised when a corrupted file is encountered. The file should be removed."""
         mock_create_model_and_transforms.side_effect = RuntimeError("The file might be corrupted")
-        with patch("marqo.s2_inference.clip_utils.download_model", return_value = self.dummpy_corrupted_file):
-            for model_properties in self.dummpy_model_properties:
+        for model_properties in self.dummpy_model_properties:
+            with patch("marqo.core.inference.models.open_clip_model.download_model",
+                       return_value = self.dummpy_corrupted_file):
                 with self.assertRaises(InvalidModelPropertiesError) as context:
                     _ = _load_model(**self.load_parameters, model_properties=model_properties)
                 # Verify
                 self.assertIn("Marqo encountered a corrupted file when loading open_clip file", str(context.exception))
                 mock_os_remove.assert_called_once_with(self.dummpy_corrupted_file)
 
-                # Reset necessary mock
+                # Reset the mock
                 mock_os_remove.reset_mock()
 
     @patch('open_clip.create_model', autospec=True)
@@ -78,7 +79,8 @@ def test_file_removal_failure_handling(self, mock_os_remove, mock_create_model_a
         # Setup
         mock_create_model_and_transforms.side_effect = RuntimeError("The file might be corrupted")
         mock_os_remove.side_effect = OSError("Permission denied")
-        with patch("marqo.s2_inference.clip_utils.download_model", return_value = self.dummpy_corrupted_file):
+        with patch("marqo.core.inference.models.open_clip_model.download_model",
+                   return_value = self.dummpy_corrupted_file):
             for model_properties in self.dummpy_model_properties:
                 # Execute and Verify
                 with self.assertRaises(RuntimeError) as context:
@@ -96,7 +98,8 @@ def test_file_removal_failure_handling(self, mock_os_remove, mock_create_model_a
     def test_other_errors_handling(self, mock_os_remove, mock_create_model_and_transforms):
         # Setup
         mock_create_model_and_transforms.side_effect = Exception("An error occurred")
-        with patch("marqo.s2_inference.clip_utils.download_model", return_value = self.dummpy_corrupted_file):
+        with patch("marqo.core.inference.models.open_clip_model.download_model",
+                   return_value = self.dummpy_corrupted_file):
             for model_properties in self.dummpy_model_properties:
                 # Execute and Verify
                 with self.assertRaises(RuntimeError) as context:
@@ -110,7 +113,8 @@ def test_load_clip_into_open_clip_errors_handling(self, mock_os_remove, mock_cre
         # Setup
         mock_create_model_and_transforms.side_effect = Exception(
             "This could be because the operator doesn't exist for this backend")
-        with patch("marqo.s2_inference.clip_utils.download_model", return_value = self.dummpy_corrupted_file):
+        with patch("marqo.core.inference.models.open_clip_model.download_model",
+                   return_value = self.dummpy_corrupted_file):
             for model_properties in self.dummpy_model_properties:
                 # Execute and Verify
                 with self.assertRaises(InvalidModelPropertiesError) as context:
diff --git a/tests/core/inference/test_download_mode_from_hf.py b/tests/core/inference/test_download_mode_from_hf.py
index 9eb9922f6..a1e17aeb0 100644
--- a/tests/core/inference/test_download_mode_from_hf.py
+++ b/tests/core/inference/test_download_mode_from_hf.py
@@ -13,7 +13,7 @@ def setUp(self):
         self.hf_auth = HfAuth(token="test-token")
 
     def test_download_model_from_hf_success(self):
-        with patch("marqo.s2_inference.model_downloading.from_hf.hf_hub_download",
+        with patch("marqo.core.inference.download_model_from_hf.hf_hub_download",
                    return_value="model_path") as hf_hub_download_mock:
             result = download_model_from_hf(self.hf_location, self.hf_auth)
         self.assertEqual(result, "model_path")
@@ -21,49 +21,49 @@ def test_download_model_from_hf_success(self):
 
     def test_download_model_from_hf_no_auth(self):
         with patch(
-                "marqo.s2_inference.model_downloading.from_hf.hf_hub_download",
+                "marqo.core.inference.download_model_from_hf.hf_hub_download",
                 return_value="model_path") as hf_hub_download_mock:
             result = download_model_from_hf(self.hf_location)
         self.assertEqual(result, "model_path")
         hf_hub_download_mock.assert_called_once_with(repo_id="test-repo-id", filename="test-filename", cache_dir=None)
 
     def test_download_model_from_hf_repository_not_found_error(self):
-        with patch("marqo.s2_inference.model_downloading.from_hf.hf_hub_download",
+        with patch("marqo.core.inference.download_model_from_hf.hf_hub_download",
                    side_effect=RepositoryNotFoundError("repo not found")):
             with self.assertRaises(ModelDownloadError):
                 download_model_from_hf(self.hf_location, self.hf_auth)
 
     def test_download_model_from_hf_invalid_location(self):
         invalid_location = HfModelLocation(repo_id="", filename="test-filename")
-        with patch("marqo.s2_inference.model_downloading.from_hf.hf_hub_download",
+        with patch("marqo.core.inference.download_model_from_hf.hf_hub_download",
                    side_effect=RepositoryNotFoundError("repo not found")):
             with self.assertRaises(ModelDownloadError):
                 download_model_from_hf(invalid_location, self.hf_auth)
 
     def test_download_model_from_hf_invalid_auth(self):
         invalid_auth = HfAuth(token="")
-        with patch("marqo.s2_inference.model_downloading.from_hf.hf_hub_download",
+        with patch("marqo.core.inference.download_model_from_hf.hf_hub_download",
                    side_effect=RepositoryNotFoundError("repo not found")):
             with self.assertRaises(ModelDownloadError):
                 download_model_from_hf(self.hf_location, invalid_auth)
 
     def test_download_model_from_hf_unexpected_error(self):
-        with patch("marqo.s2_inference.model_downloading.from_hf.hf_hub_download",
+        with patch("marqo.core.inference.download_model_from_hf.hf_hub_download",
                    side_effect=Exception("Unexpected error")):
             with self.assertRaises(Exception):
                 download_model_from_hf(self.hf_location, self.hf_auth)
 
     def test_download_model_from_hf_with_download_dir(self):
-        with patch("marqo.s2_inference.model_downloading.from_hf.hf_hub_download",
+        with patch("marqo.core.inference.download_model_from_hf.hf_hub_download",
                    return_value="model_path") as hf_hub_download_mock:
-            with patch("marqo.s2_inference.model_downloading.from_hf.logger.warning") as logger_warning_mock:
+            with patch("marqo.core.inference.download_model_from_hf.logger.warning") as logger_warning_mock:
                 result = download_model_from_hf(self.hf_location, self.hf_auth, download_dir="custom_download_dir")
         self.assertEqual(result, "model_path")
         hf_hub_download_mock.assert_called_once_with(repo_id="test-repo-id", filename="test-filename", token="test-token", cache_dir="custom_download_dir")
 
     def test_download_model_from_hf_no_auth_with_hf_dir(self):
         with patch(
-                "marqo.s2_inference.model_downloading.from_hf.hf_hub_download",
+                "marqo.core.inference.download_model_from_hf.hf_hub_download",
                 return_value="model_path") as hf_hub_download_mock:
             result = download_model_from_hf(self.hf_location, download_dir=ModelCache.hf_cache_path)
         self.assertEqual(result, "model_path")
@@ -72,7 +72,7 @@ def test_download_model_from_hf_no_auth_with_hf_dir(self):
 
     def test_download_model_from_hf_no_auth_with_openclip_dir(self):
         with patch(
-                "marqo.s2_inference.model_downloading.from_hf.hf_hub_download",
+                "marqo.core.inference.download_model_from_hf.hf_hub_download",
                 return_value="model_path") as hf_hub_download_mock:
             result = download_model_from_hf(self.hf_location, download_dir=ModelCache.clip_cache_path)
         self.assertEqual(result, "model_path")
diff --git a/tests/core/inference/test_download_model.py b/tests/core/inference/test_download_model.py
index 3c35dfa3d..96364d487 100644
--- a/tests/core/inference/test_download_model.py
+++ b/tests/core/inference/test_download_model.py
@@ -18,7 +18,7 @@ def test_neither_location_nor_url_provided(self):
         with self.assertRaises(InvalidModelPropertiesError):
             download_model()
 
-    @patch("marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_s3")
+    @patch("marqo.core.inference.model_download.download_pretrained_from_s3")
     def test_download_from_s3(self, mock_download_s3):
         mock_download_s3.return_value = "/path/to/model.pt"
         repo_location = ModelLocation(s3=S3Location(Bucket="test_bucket", Key="test_key"))
@@ -28,7 +28,7 @@ def test_download_from_s3(self, mock_download_s3):
         self.assertEqual(model_path, "/path/to/model.pt")
         mock_download_s3.assert_called_once_with(location=repo_location.s3, auth=auth.s3, download_dir=None)
 
-    @patch("marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url")
+    @patch("marqo.core.inference.model_download.download_pretrained_from_url")
     def test_download_from_url(self, mock_download_url):
         mock_download_url.return_value = "/path/to/model.pt"
         url = "http://example.com/model.pt"
@@ -43,13 +43,13 @@ def setUp(self):
         self.s3_location = S3Location(Bucket="test_bucket", Key="remote_path/test_key.pt")
         self.s3_auth = S3Auth(aws_access_key_id="test_access_key", aws_secret_access_key="test_secret_key")
 
-    @patch("marqo.s2_inference.processing.custom_clip_utils.check_s3_model_already_exists")
+    @patch("marqo.core.inference.model_download.check_s3_model_already_exists")
     def test_model_exists_locally(self, mock_check_s3_model):
         mock_check_s3_model.return_value = True
 
-        with patch("marqo.s2_inference.processing.custom_clip_utils.get_s3_model_absolute_cache_path"
+        with patch("marqo.core.inference.model_download.get_s3_model_absolute_cache_path"
                    ) as mock_get_abs_path:
-            with patch("marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url"
+            with patch("marqo.core.inference.model_download.download_pretrained_from_url"
                        ) as mock_download_pretrained_from_url:
                 mock_get_abs_path.return_value = "/path/to/model.pt"
                 result = download_pretrained_from_s3(location=self.s3_location, auth=self.s3_auth)
@@ -58,13 +58,13 @@ def test_model_exists_locally(self, mock_check_s3_model):
         mock_download_pretrained_from_url.assert_not_called()
         mock_check_s3_model.assert_called_once_with(location=self.s3_location, download_dir=None)
 
-    @patch("marqo.s2_inference.processing.custom_clip_utils.check_s3_model_already_exists")
-    @patch("marqo.s2_inference.processing.custom_clip_utils.get_presigned_s3_url")
+    @patch("marqo.core.inference.model_download.check_s3_model_already_exists")
+    @patch("marqo.core.inference.model_download.get_presigned_s3_url")
     def test_model_does_not_exist_locally(self, mock_get_presigned_url, mock_check_s3_model):
         mock_check_s3_model.return_value = False
         mock_get_presigned_url.return_value = "http://example.com/model.pt"
 
-        with patch("marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url"
+        with patch("marqo.core.inference.model_download.download_pretrained_from_url"
                    ) as mock_download_pretrained_from_url:
             mock_download_pretrained_from_url.return_value = "/path/to/model.pt"
             result = download_pretrained_from_s3(location=self.s3_location, auth=self.s3_auth)
@@ -81,13 +81,13 @@ def test_model_does_not_exist_locally(self, mock_get_presigned_url, mock_check_s
             cache_file_name='test_key.pt'
         )
 
-    @patch("marqo.s2_inference.processing.custom_clip_utils.check_s3_model_already_exists")
-    @patch("marqo.s2_inference.processing.custom_clip_utils.get_presigned_s3_url")
+    @patch("marqo.core.inference.model_download.check_s3_model_already_exists")
+    @patch("marqo.core.inference.model_download.get_presigned_s3_url")
     def test_model_download_raises_403_error(self, mock_get_presigned_url, mock_check_s3_model):
         mock_check_s3_model.return_value = False
         mock_get_presigned_url.return_value = "http://example.com/model.pt"
 
-        with patch("marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url") as mock_download_url:
+        with patch("marqo.core.inference.model_download.download_pretrained_from_url") as mock_download_url:
             mock_download_url.side_effect = urllib.error.HTTPError(url=None, code=403, msg=None, hdrs=None, fp=None)
 
             with self.assertRaises(ModelDownloadError):
@@ -102,8 +102,8 @@ def setUp(self):
     def test_file_exists_locally(self, mock_isfile, mock_urlopen):
         mock_isfile.return_value = True
         with patch("builtins.open", unittest.mock.mock_open()) as mock_open:
-            with patch("marqo.s2_inference.processing.custom_clip_utils.tqdm") as mock_tqdm:
-                with patch("marqo.s2_inference.processing.custom_clip_utils.ModelCache") as mock_cache:
+            with patch("marqo.core.inference.model_download.tqdm") as mock_tqdm:
+                with patch("marqo.core.inference.model_download.ModelCache") as mock_cache:
                     with tempfile.TemporaryDirectory() as temp_cache_dir:
                         mock_cache.clip_cache_path = temp_cache_dir
                         result = download_pretrained_from_url(self.url)
@@ -122,8 +122,8 @@ def test_file_does_not_exist_locally(self, mock_urlopen, mock_isfile):
         mock_urlopen.return_value.__enter__.return_value = mock_source
 
         with patch("builtins.open", unittest.mock.mock_open()) as mock_open:
-            with patch("marqo.s2_inference.processing.custom_clip_utils.tqdm") as mock_tqdm:
-                with patch("marqo.s2_inference.processing.custom_clip_utils.ModelCache") as mock_cache:
+            with patch("marqo.core.inference.model_download.tqdm") as mock_tqdm:
+                with patch("marqo.core.inference.model_download.ModelCache") as mock_cache:
                     with tempfile.TemporaryDirectory() as temp_cache_dir:
                         mock_cache.clip_cache_path = temp_cache_dir
                         result = download_pretrained_from_url(self.url)
@@ -143,8 +143,8 @@ def test_file_does_not_exist_locally_custom_filename(self, mock_urlopen, mock_is
         mock_urlopen.return_value.__enter__.return_value = mock_source
 
         with patch("builtins.open", unittest.mock.mock_open()) as mock_open:
-            with patch("marqo.s2_inference.processing.custom_clip_utils.tqdm") as mock_tqdm:
-                with patch("marqo.s2_inference.processing.custom_clip_utils.ModelCache") as mock_cache:
+            with patch("marqo.core.inference.model_download.tqdm") as mock_tqdm:
+                with patch("marqo.core.inference.model_download.ModelCache") as mock_cache:
                     with tempfile.TemporaryDirectory() as temp_cache_dir:
                         mock_cache.clip_cache_path = temp_cache_dir
                         result = download_pretrained_from_url(self.url, cache_file_name='unusual_model.pt')
@@ -164,8 +164,8 @@ def test_file_does_not_exist_locally_custom_cache_dir(self, mock_urlopen, mock_i
         mock_urlopen.return_value.__enter__.return_value = mock_source
 
         with patch("builtins.open", unittest.mock.mock_open()) as mock_open:
-            with patch("marqo.s2_inference.processing.custom_clip_utils.tqdm") as mock_tqdm:
-                with patch("marqo.s2_inference.processing.custom_clip_utils.ModelCache") as mock_cache:
+            with patch("marqo.core.inference.model_download.tqdm") as mock_tqdm:
+                with patch("marqo.core.inference.model_download.ModelCache") as mock_cache:
                     with tempfile.TemporaryDirectory() as temp_cache_dir:
                         custom_dir = os.path.join(temp_cache_dir, 'special/cache')
                         mock_cache.clip_cache_path = temp_cache_dir
@@ -186,8 +186,8 @@ def test_file_does_not_exist_locally_custom_cache_path(self, mock_urlopen, mock_
         mock_urlopen.return_value.__enter__.return_value = mock_source
 
         with patch("builtins.open", unittest.mock.mock_open()) as mock_open:
-            with patch("marqo.s2_inference.processing.custom_clip_utils.tqdm") as mock_tqdm:
-                with patch("marqo.s2_inference.processing.custom_clip_utils.ModelCache") as mock_cache:
+            with patch("marqo.core.inference.model_download.tqdm") as mock_tqdm:
+                with patch("marqo.core.inference.model_download.ModelCache") as mock_cache:
                     with tempfile.TemporaryDirectory() as temp_cache_dir:
                         custom_dir = os.path.join(temp_cache_dir, 'special/cache')
                         mock_cache.clip_cache_path = temp_cache_dir

From 3640a03006c43f1d192a89162a1c598789ed0037 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Mon, 7 Oct 2024 13:50:58 +1100
Subject: [PATCH 37/63] Change name to inference models

---
 .../core/inference/{models => inference_models}/__init__.py | 0
 .../{models => inference_models}/abstract_clip_model.py     | 6 +++---
 .../abstract_embedding_model.py                             | 0
 .../inference/{models => inference_models}/hf_tokenizer.py  | 0
 .../{models => inference_models}/hugging_face_model.py      | 4 ++--
 .../hugging_face_model_properties.py                        | 0
 .../{models => inference_models}/image_download.py          | 0
 .../{models => inference_models}/open_clip_model.py         | 6 +++---
 .../open_clip_model_properties.py                           | 0
 src/marqo/s2_inference/clip_utils.py                        | 6 +++---
 src/marqo/s2_inference/model_registry.py                    | 4 ++--
 src/marqo/s2_inference/s2_inference.py                      | 2 +-
 tests/core/inference/test_corrupt_file_error_handling.py    | 2 +-
 tests/core/inference/test_hugging_face_model.py             | 4 ++--
 tests/core/inference/test_hugging_face_model_properties.py  | 2 +-
 .../open_clip_models/test_marqo_fashion_clip.py             | 2 +-
 .../open_clip_models/test_open_clip_model_load.py           | 2 +-
 tests/s2_inference/test_clip_utils.py                       | 2 +-
 tests/tensor_search/test_model_auth.py                      | 2 +-
 19 files changed, 22 insertions(+), 22 deletions(-)
 rename src/marqo/core/inference/{models => inference_models}/__init__.py (100%)
 rename src/marqo/core/inference/{models => inference_models}/abstract_clip_model.py (93%)
 rename src/marqo/core/inference/{models => inference_models}/abstract_embedding_model.py (100%)
 rename src/marqo/core/inference/{models => inference_models}/hf_tokenizer.py (100%)
 rename src/marqo/core/inference/{models => inference_models}/hugging_face_model.py (97%)
 rename src/marqo/core/inference/{models => inference_models}/hugging_face_model_properties.py (100%)
 rename src/marqo/core/inference/{models => inference_models}/image_download.py (100%)
 rename src/marqo/core/inference/{models => inference_models}/open_clip_model.py (97%)
 rename src/marqo/core/inference/{models => inference_models}/open_clip_model_properties.py (100%)

diff --git a/src/marqo/core/inference/models/__init__.py b/src/marqo/core/inference/inference_models/__init__.py
similarity index 100%
rename from src/marqo/core/inference/models/__init__.py
rename to src/marqo/core/inference/inference_models/__init__.py
diff --git a/src/marqo/core/inference/models/abstract_clip_model.py b/src/marqo/core/inference/inference_models/abstract_clip_model.py
similarity index 93%
rename from src/marqo/core/inference/models/abstract_clip_model.py
rename to src/marqo/core/inference/inference_models/abstract_clip_model.py
index a7297c047..466687900 100644
--- a/src/marqo/core/inference/models/abstract_clip_model.py
+++ b/src/marqo/core/inference/inference_models/abstract_clip_model.py
@@ -2,10 +2,10 @@
 
 from PIL import UnidentifiedImageError
 
-from marqo.core.inference.models.abstract_embedding_model import AbstractEmbeddingModel
+from marqo.core.inference.inference_models.abstract_embedding_model import AbstractEmbeddingModel
 from marqo.s2_inference.types import *
-from marqo.core.inference.models.image_download import (_is_image, format_and_load_CLIP_images,
-                                                        format_and_load_CLIP_image)
+from marqo.core.inference.inference_models.image_download import (_is_image, format_and_load_CLIP_images,
+                                                                  format_and_load_CLIP_image)
 from marqo.s2_inference.logger import get_logger
 import torch
 
diff --git a/src/marqo/core/inference/models/abstract_embedding_model.py b/src/marqo/core/inference/inference_models/abstract_embedding_model.py
similarity index 100%
rename from src/marqo/core/inference/models/abstract_embedding_model.py
rename to src/marqo/core/inference/inference_models/abstract_embedding_model.py
diff --git a/src/marqo/core/inference/models/hf_tokenizer.py b/src/marqo/core/inference/inference_models/hf_tokenizer.py
similarity index 100%
rename from src/marqo/core/inference/models/hf_tokenizer.py
rename to src/marqo/core/inference/inference_models/hf_tokenizer.py
diff --git a/src/marqo/core/inference/models/hugging_face_model.py b/src/marqo/core/inference/inference_models/hugging_face_model.py
similarity index 97%
rename from src/marqo/core/inference/models/hugging_face_model.py
rename to src/marqo/core/inference/inference_models/hugging_face_model.py
index 486d464b6..a8c98e4e9 100644
--- a/src/marqo/core/inference/models/hugging_face_model.py
+++ b/src/marqo/core/inference/inference_models/hugging_face_model.py
@@ -11,8 +11,8 @@
 
 from marqo import marqo_docs
 from marqo.core.inference.model_download import download_model
-from marqo.core.inference.models.abstract_embedding_model import AbstractEmbeddingModel
-from marqo.core.inference.models.hugging_face_model_properties import HuggingFaceModelProperties, PoolingMethod
+from marqo.core.inference.inference_models.abstract_embedding_model import AbstractEmbeddingModel
+from marqo.core.inference.inference_models.hugging_face_model_properties import HuggingFaceModelProperties, PoolingMethod
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.errors import InvalidModelPropertiesError
 from marqo.s2_inference.types import Union, FloatTensor, List
diff --git a/src/marqo/core/inference/models/hugging_face_model_properties.py b/src/marqo/core/inference/inference_models/hugging_face_model_properties.py
similarity index 100%
rename from src/marqo/core/inference/models/hugging_face_model_properties.py
rename to src/marqo/core/inference/inference_models/hugging_face_model_properties.py
diff --git a/src/marqo/core/inference/models/image_download.py b/src/marqo/core/inference/inference_models/image_download.py
similarity index 100%
rename from src/marqo/core/inference/models/image_download.py
rename to src/marqo/core/inference/inference_models/image_download.py
diff --git a/src/marqo/core/inference/models/open_clip_model.py b/src/marqo/core/inference/inference_models/open_clip_model.py
similarity index 97%
rename from src/marqo/core/inference/models/open_clip_model.py
rename to src/marqo/core/inference/inference_models/open_clip_model.py
index c8106c169..9947f7db8 100644
--- a/src/marqo/core/inference/models/open_clip_model.py
+++ b/src/marqo/core/inference/inference_models/open_clip_model.py
@@ -7,12 +7,12 @@
 from torchvision.transforms import Compose
 
 from marqo import marqo_docs
-from marqo.core.inference.models.abstract_clip_model import AbstractCLIPModel
-from marqo.core.inference.models.open_clip_model_properties import OpenCLIPModelProperties, ImagePreprocessor
+from marqo.core.inference.inference_models.abstract_clip_model import AbstractCLIPModel
+from marqo.core.inference.inference_models.open_clip_model_properties import OpenCLIPModelProperties, ImagePreprocessor
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.errors import InvalidModelPropertiesError
 from marqo.s2_inference.logger import get_logger
-from marqo.core.inference.models.hf_tokenizer import HFTokenizer
+from marqo.core.inference.inference_models.hf_tokenizer import HFTokenizer
 from marqo.core.inference.model_download import download_model
 from marqo.s2_inference.types import *
 from marqo.tensor_search.models.private_models import ModelLocation
diff --git a/src/marqo/core/inference/models/open_clip_model_properties.py b/src/marqo/core/inference/inference_models/open_clip_model_properties.py
similarity index 100%
rename from src/marqo/core/inference/models/open_clip_model_properties.py
rename to src/marqo/core/inference/inference_models/open_clip_model_properties.py
diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index bb0fb285c..20f9fd800 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -21,12 +21,12 @@
 
 from marqo import marqo_docs
 from marqo.api.exceptions import InternalError
-from marqo.core.inference.models.abstract_clip_model import AbstractCLIPModel
-from marqo.core.inference.models.open_clip_model_properties import OpenCLIPModelProperties, ImagePreprocessor
+from marqo.core.inference.inference_models.abstract_clip_model import AbstractCLIPModel
+from marqo.core.inference.inference_models.open_clip_model_properties import OpenCLIPModelProperties, ImagePreprocessor
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.errors import InvalidModelPropertiesError, ImageDownloadError
 from marqo.s2_inference.logger import get_logger
-from marqo.core.inference.models.hf_tokenizer import HFTokenizer
+from marqo.core.inference.inference_models.hf_tokenizer import HFTokenizer
 from marqo.core.inference.model_download import download_model
 from marqo.s2_inference.types import *
 from marqo.tensor_search.enums import ModelProperties, InferenceParams
diff --git a/src/marqo/s2_inference/model_registry.py b/src/marqo/s2_inference/model_registry.py
index d53ff888f..dc06d74d1 100644
--- a/src/marqo/s2_inference/model_registry.py
+++ b/src/marqo/s2_inference/model_registry.py
@@ -1,7 +1,7 @@
 from marqo.s2_inference.clip_utils import CLIP, MULTILINGUAL_CLIP, FP16_CLIP, \
     get_multilingual_clip_properties
-from marqo.core.inference.models.open_clip_model import OPEN_CLIP
-from marqo.core.inference.models.hugging_face_model import HuggingFaceModel
+from marqo.core.inference.inference_models.open_clip_model import OPEN_CLIP
+from marqo.core.inference.inference_models.hugging_face_model import HuggingFaceModel
 from marqo.s2_inference.onnx_clip_utils import CLIP_ONNX
 from marqo.s2_inference.random_utils import Random
 from marqo.s2_inference.sbert_onnx_utils import SBERT_ONNX
diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py
index 89f6bd3cd..69d8adbf2 100644
--- a/src/marqo/s2_inference/s2_inference.py
+++ b/src/marqo/s2_inference/s2_inference.py
@@ -15,7 +15,7 @@
 from marqo import marqo_docs
 from marqo.api.exceptions import ModelCacheManagementError, ConfigurationError, InternalError
 from marqo.s2_inference import constants
-from marqo.core.inference.models.open_clip_model import OPEN_CLIP
+from marqo.core.inference.inference_models.open_clip_model import OPEN_CLIP
 from marqo.s2_inference.clip_utils import CLIP
 from marqo.s2_inference.configs import get_default_normalization, get_default_seq_length
 from marqo.s2_inference.errors import (
diff --git a/tests/core/inference/test_corrupt_file_error_handling.py b/tests/core/inference/test_corrupt_file_error_handling.py
index 48b555662..a30bc439f 100644
--- a/tests/core/inference/test_corrupt_file_error_handling.py
+++ b/tests/core/inference/test_corrupt_file_error_handling.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-from marqo.core.inference.models.hugging_face_model import HuggingFaceModel
+from marqo.core.inference.inference_models.hugging_face_model import HuggingFaceModel
 from marqo.s2_inference.errors import InvalidModelPropertiesError
 from marqo.s2_inference.s2_inference import _load_model
 
diff --git a/tests/core/inference/test_hugging_face_model.py b/tests/core/inference/test_hugging_face_model.py
index 46b2ecbd1..5fc32e7a8 100644
--- a/tests/core/inference/test_hugging_face_model.py
+++ b/tests/core/inference/test_hugging_face_model.py
@@ -3,10 +3,10 @@
 
 from pydantic import ValidationError
 
-from marqo.core.inference.models.hugging_face_model_properties import HuggingFaceModelProperties, PoolingMethod
+from marqo.core.inference.inference_models.hugging_face_model_properties import HuggingFaceModelProperties, PoolingMethod
 from marqo.tensor_search.models.external_apis.hf import HfModelLocation
 from marqo.tensor_search.models.private_models import ModelLocation
-from marqo.core.inference.models.hugging_face_model import HuggingFaceModel
+from marqo.core.inference.inference_models.hugging_face_model import HuggingFaceModel
 from marqo.s2_inference.errors import InvalidModelPropertiesError
 import numpy as np
 
diff --git a/tests/core/inference/test_hugging_face_model_properties.py b/tests/core/inference/test_hugging_face_model_properties.py
index f18251f6e..7884e17e7 100644
--- a/tests/core/inference/test_hugging_face_model_properties.py
+++ b/tests/core/inference/test_hugging_face_model_properties.py
@@ -3,7 +3,7 @@
 
 from pydantic import ValidationError
 
-from marqo.core.inference.models.hugging_face_model_properties import HuggingFaceModelProperties, PoolingMethod
+from marqo.core.inference.inference_models.hugging_face_model_properties import HuggingFaceModelProperties, PoolingMethod
 from marqo.tensor_search.models.external_apis.hf import HfModelLocation
 from marqo.tensor_search.models.private_models import ModelLocation
 
diff --git a/tests/s2_inference/open_clip_models/test_marqo_fashion_clip.py b/tests/s2_inference/open_clip_models/test_marqo_fashion_clip.py
index b45e72f0d..2e25814b8 100644
--- a/tests/s2_inference/open_clip_models/test_marqo_fashion_clip.py
+++ b/tests/s2_inference/open_clip_models/test_marqo_fashion_clip.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-from marqo.core.inference.models.open_clip_model import OPEN_CLIP
+from marqo.core.inference.inference_models.open_clip_model import OPEN_CLIP
 from marqo.s2_inference.model_registry import _get_open_clip_properties
 from marqo.s2_inference.s2_inference import clear_loaded_models
 
diff --git a/tests/s2_inference/open_clip_models/test_open_clip_model_load.py b/tests/s2_inference/open_clip_models/test_open_clip_model_load.py
index 870473f58..8a0130a71 100644
--- a/tests/s2_inference/open_clip_models/test_open_clip_model_load.py
+++ b/tests/s2_inference/open_clip_models/test_open_clip_model_load.py
@@ -1,7 +1,7 @@
 from unittest import TestCase
 from unittest.mock import patch, MagicMock
 
-from marqo.core.inference.models.open_clip_model import OPEN_CLIP
+from marqo.core.inference.inference_models.open_clip_model import OPEN_CLIP
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.model_registry import _get_open_clip_properties
 
diff --git a/tests/s2_inference/test_clip_utils.py b/tests/s2_inference/test_clip_utils.py
index 556bc092e..b4d9912f2 100644
--- a/tests/s2_inference/test_clip_utils.py
+++ b/tests/s2_inference/test_clip_utils.py
@@ -9,7 +9,7 @@
 from marqo.api.exceptions import InternalError
 from marqo.s2_inference import clip_utils, types
 from marqo.s2_inference.clip_utils import CLIP, FP16_CLIP, MULTILINGUAL_CLIP
-from marqo.core.inference.models.open_clip_model import OPEN_CLIP
+from marqo.core.inference.inference_models.open_clip_model import OPEN_CLIP
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.errors import ImageDownloadError
 from marqo.tensor_search.enums import ModelProperties
diff --git a/tests/tensor_search/test_model_auth.py b/tests/tensor_search/test_model_auth.py
index 516a49663..2210b385b 100644
--- a/tests/tensor_search/test_model_auth.py
+++ b/tests/tensor_search/test_model_auth.py
@@ -16,7 +16,7 @@
 from marqo.s2_inference.s2_inference import clear_loaded_models
 from transformers import AutoModel, AutoTokenizer
 from marqo.core.inference.model_download import download_pretrained_from_url
-from marqo.core.inference.models.hugging_face_model import HuggingFaceModel
+from marqo.core.inference.inference_models.hugging_face_model import HuggingFaceModel
 import os
 from marqo.api.exceptions import BadRequestError, ModelNotInCacheError
 from marqo.tensor_search.models.api_models import BulkSearchQuery, BulkSearchQueryEntity

From 5e530475cf405a4a448f6e7599a5c137386dd499 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Mon, 7 Oct 2024 14:30:31 +1100
Subject: [PATCH 38/63] Update abstraction

---
 .../inference_models/abstract_embedding_model.py     |  6 ------
 .../inference/inference_models/hugging_face_model.py |  7 +++++--
 .../inference/inference_models/open_clip_model.py    | 12 ++++++++++--
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/marqo/core/inference/inference_models/abstract_embedding_model.py b/src/marqo/core/inference/inference_models/abstract_embedding_model.py
index 85d7050ea..0b4d09e22 100644
--- a/src/marqo/core/inference/inference_models/abstract_embedding_model.py
+++ b/src/marqo/core/inference/inference_models/abstract_embedding_model.py
@@ -20,7 +20,6 @@ def __init__(self, model_properties: Optional[dict] = None, device: Optional[str
         if model_properties is None:
             model_properties = dict()
 
-        self.model_properties = self._build_model_properties(model_properties)
         self.device = device
         self.model_auth = model_auth
 
@@ -33,11 +32,6 @@ def load(self):
         self._load_necessary_components()
         self._check_loaded_components()
 
-    @abstractmethod
-    def _build_model_properties(self, model_properties: dict):
-        """Parse the model properties from the user input and convert it to a pydantic model."""
-        pass
-
     @abstractmethod
     def _load_necessary_components(self):
         """Load the necessary components for the model."""
diff --git a/src/marqo/core/inference/inference_models/hugging_face_model.py b/src/marqo/core/inference/inference_models/hugging_face_model.py
index a8c98e4e9..086d0f787 100644
--- a/src/marqo/core/inference/inference_models/hugging_face_model.py
+++ b/src/marqo/core/inference/inference_models/hugging_face_model.py
@@ -25,15 +25,18 @@ class HuggingFaceModel(AbstractEmbeddingModel):
     def __init__(self, model_properties: dict, device: str, model_auth: dict):
         super().__init__(model_properties, device, model_auth)
 
+        self.model_properties = self._build_model_properties(model_properties)
+
         self._model = None
         self._tokenizer = None
         self._pooling_func = None
 
-    def _build_model_properties(self, model_properties: dict):
+    def _build_model_properties(self, model_properties: dict) -> HuggingFaceModelProperties:
+        """Convert the user input model_properties to HuggingFaceModelProperties."""
         try:
             return HuggingFaceModelProperties(**model_properties)
         except ValidationError as e:
-            raise InvalidModelPropertiesError(f"Invalid model properties for the 'hf' model. Original error {e}") \
+            raise InvalidModelPropertiesError(f"Invalid model properties: {model_properties}. Original error {e}") \
                 from e
 
     def _check_loaded_components(self):
diff --git a/src/marqo/core/inference/inference_models/open_clip_model.py b/src/marqo/core/inference/inference_models/open_clip_model.py
index 9947f7db8..447a954ed 100644
--- a/src/marqo/core/inference/inference_models/open_clip_model.py
+++ b/src/marqo/core/inference/inference_models/open_clip_model.py
@@ -16,6 +16,7 @@
 from marqo.core.inference.model_download import download_model
 from marqo.s2_inference.types import *
 from marqo.tensor_search.models.private_models import ModelLocation
+from pydantic import ValidationError
 
 logger = get_logger(__name__)
 
@@ -33,11 +34,18 @@ def __init__(
 
         super().__init__(device, model_properties, model_auth)
 
+        self.model_properties = self._build_model_properties(model_properties)
+
         # model_auth gets passed through add_docs and search requests:
         self.preprocess_config = None
 
-    def _build_model_properties(self, model_properties: dict):
-        return OpenCLIPModelProperties(**model_properties)
+    def _build_model_properties(self, model_properties: dict) -> OpenCLIPModelProperties:
+        """Convert the user input model_properties to OpenCLIPModelProperties."""
+        try:
+            return OpenCLIPModelProperties(**model_properties)
+        except ValidationError as e:
+            raise InvalidModelPropertiesError(f"Invalid model properties: {model_properties}. Original error: {e}") \
+                from e
 
     def _load_necessary_components(self) -> None:
         """Load the open_clip model and _tokenizer."""

From 0b2cf0f88b905562a8ee93f61bac02e2afd78b98 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Mon, 7 Oct 2024 15:16:10 +1100
Subject: [PATCH 39/63] Fix tests

---
 .../test_corrupt_file_error_handling.py        | 16 ++++++++--------
 .../test_hugging_face_model_properties.py      |  4 ++--
 .../test_open_clip_model_load.py               |  4 ++--
 tests/tensor_search/test_model_auth.py         | 18 +++++++++---------
 4 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/tests/core/inference/test_corrupt_file_error_handling.py b/tests/core/inference/test_corrupt_file_error_handling.py
index a30bc439f..fae20c65b 100644
--- a/tests/core/inference/test_corrupt_file_error_handling.py
+++ b/tests/core/inference/test_corrupt_file_error_handling.py
@@ -62,7 +62,7 @@ def test_corrupted_file_handling(self, mock_os_remove, mock_create_model_and_tra
         """Ensure that a proper error is raised when a corrupted file is encountered. The file should be removed."""
         mock_create_model_and_transforms.side_effect = RuntimeError("The file might be corrupted")
         for model_properties in self.dummpy_model_properties:
-            with patch("marqo.core.inference.models.open_clip_model.download_model",
+            with patch("marqo.core.inference.inference_models.open_clip_model.download_model",
                        return_value = self.dummpy_corrupted_file):
                 with self.assertRaises(InvalidModelPropertiesError) as context:
                     _ = _load_model(**self.load_parameters, model_properties=model_properties)
@@ -79,7 +79,7 @@ def test_file_removal_failure_handling(self, mock_os_remove, mock_create_model_a
         # Setup
         mock_create_model_and_transforms.side_effect = RuntimeError("The file might be corrupted")
         mock_os_remove.side_effect = OSError("Permission denied")
-        with patch("marqo.core.inference.models.open_clip_model.download_model",
+        with patch("marqo.core.inference.inference_models.open_clip_model.download_model",
                    return_value = self.dummpy_corrupted_file):
             for model_properties in self.dummpy_model_properties:
                 # Execute and Verify
@@ -98,7 +98,7 @@ def test_file_removal_failure_handling(self, mock_os_remove, mock_create_model_a
     def test_other_errors_handling(self, mock_os_remove, mock_create_model_and_transforms):
         # Setup
         mock_create_model_and_transforms.side_effect = Exception("An error occurred")
-        with patch("marqo.core.inference.models.open_clip_model.download_model",
+        with patch("marqo.core.inference.inference_models.open_clip_model.download_model",
                    return_value = self.dummpy_corrupted_file):
             for model_properties in self.dummpy_model_properties:
                 # Execute and Verify
@@ -113,7 +113,7 @@ def test_load_clip_into_open_clip_errors_handling(self, mock_os_remove, mock_cre
         # Setup
         mock_create_model_and_transforms.side_effect = Exception(
             "This could be because the operator doesn't exist for this backend")
-        with patch("marqo.core.inference.models.open_clip_model.download_model",
+        with patch("marqo.core.inference.inference_models.open_clip_model.download_model",
                    return_value = self.dummpy_corrupted_file):
             for model_properties in self.dummpy_model_properties:
                 # Execute and Verify
@@ -207,7 +207,7 @@ def test_regular_file(self):
         with patch('os.path.isfile', return_value=True), \
              patch('os.path.splitext', return_value=('/path/to/file', '.txt')), \
              patch('os.makedirs'), \
-             patch("marqo.core.inference.models.hugging_face_model.download_model", return_value = "/path/to/file.txt"):
+             patch("marqo.core.inference.inference_models.hugging_face_model.download_model", return_value = "/path/to/file.txt"):
             for model_properties in self.dummy_model_properties:
                 with self.assertRaises(RuntimeError) as context:
                     _ = _load_model(**self.load_parameters, model_properties=model_properties)
@@ -218,7 +218,7 @@ def test_zip_file(self):
              patch('os.path.splitext', return_value=('/path/to/file', '.zip')), \
              patch('os.makedirs') as mock_makedirs, \
              patch('zipfile.ZipFile') as mock_zipfile, \
-             patch("marqo.core.inference.models.hugging_face_model.download_model", return_value="/path/to/file.zip"),\
+             patch("marqo.core.inference.inference_models.hugging_face_model.download_model", return_value="/path/to/file.zip"),\
              patch("transformers.AutoModel.from_pretrained") as mock_model,\
              patch("transformers.AutoTokenizer.from_pretrained") as mock_tokenizer:
 
@@ -240,7 +240,7 @@ def test_tar_file(self):
             patch('os.path.splitext', return_value=('/path/to/file', '.tar')), \
             patch('os.makedirs') as mock_makedirs, \
             patch('tarfile.open') as mock_tarfile,\
-            patch("marqo.core.inference.models.hugging_face_model.download_model", return_value="/path/to/file.tar"), \
+            patch("marqo.core.inference.inference_models.hugging_face_model.download_model", return_value="/path/to/file.tar"), \
             patch("transformers.AutoModel.from_pretrained") as mock_model, \
             patch("transformers.AutoTokenizer.from_pretrained") as mock_tokenizer:
 
@@ -259,7 +259,7 @@ def test_tar_file(self):
 
     def test_directory(self):
         with patch('os.path.isfile', return_value=False),\
-            patch("marqo.core.inference.models.hugging_face_model.download_model", return_value="/path/to/file.tar"), \
+            patch("marqo.core.inference.inference_models.hugging_face_model.download_model", return_value="/path/to/file.tar"), \
             patch("transformers.AutoModel.from_pretrained") as mock_model, \
             patch("transformers.AutoTokenizer.from_pretrained") as mock_tokenizer:
             self.assertEqual(HuggingFaceModel.extract_huggingface_archive('/path/to/directory'), '/path/to/directory')
diff --git a/tests/core/inference/test_hugging_face_model_properties.py b/tests/core/inference/test_hugging_face_model_properties.py
index 7884e17e7..0d81ae897 100644
--- a/tests/core/inference/test_hugging_face_model_properties.py
+++ b/tests/core/inference/test_hugging_face_model_properties.py
@@ -46,7 +46,7 @@ def test_invalid_model_with_url_and_model_location(self):
     def test_infer_pooling_method(self):
         for pooling_method in (PoolingMethod.Mean, PoolingMethod.CLS):
             with self.subTest(f"Pooling method inferred from name with {pooling_method}"):
-                with mock.patch("marqo.core.inference.models.hugging_face_model_properties."
+                with mock.patch("marqo.core.inference.inference_models.hugging_face_model_properties."
                                 "HuggingFaceModelProperties._infer_pooling_method_from_name",
                                 return_value = pooling_method) as mock_infer:
                     model = HuggingFaceModelProperties(name="model-with-cls", type="hf")
@@ -54,7 +54,7 @@ def test_infer_pooling_method(self):
                 self.assertEqual(pooling_method, model.pooling_method)
 
     def test_explicit_valid_pooling_method(self):
-        with mock.patch("marqo.core.inference.models.hugging_face_model_properties."
+        with mock.patch("marqo.core.inference.inference_models.hugging_face_model_properties."
                         "HuggingFaceModelProperties._infer_pooling_method_from_name") as mock_infer:
             model = HuggingFaceModelProperties(name="test-model", type="hf", pooling_method=PoolingMethod.CLS)
         self.assertEqual(model.pooling_method, PoolingMethod.CLS)
diff --git a/tests/s2_inference/open_clip_models/test_open_clip_model_load.py b/tests/s2_inference/open_clip_models/test_open_clip_model_load.py
index 8a0130a71..7fe31bfe6 100644
--- a/tests/s2_inference/open_clip_models/test_open_clip_model_load.py
+++ b/tests/s2_inference/open_clip_models/test_open_clip_model_load.py
@@ -27,9 +27,9 @@ def test_load_OpenCLIPModelFromCheckPointMethod_success(self):
             "type": "open_clip"
         }
 
-        with patch("marqo.core.inference.models.open_clip_model import OPEN_CLIP._load_model_and_image_preprocessor_from_checkpoint", \
+        with patch("marqo.core.inference.inference_models.open_clip_model import OPEN_CLIP._load_model_and_image_preprocessor_from_checkpoint", \
                    return_value=(MagicMock(), MagicMock())) as mock_load_method:
-            with patch("marqo.core.inference.models.open_clip_model import OPEN_CLIP._load_tokenizer_from_checkpoint",
+            with patch("marqo.core.inference.inference_models.open_clip_model import OPEN_CLIP._load_tokenizer_from_checkpoint",
                        return_value=MagicMock()) as mock_load_tokenizer:
                 with patch.object(MagicMock(), 'eval', return_value=None) as mock_eval:
                     model = OPEN_CLIP(model_properties=model_properties, device="cpu")
diff --git a/tests/tensor_search/test_model_auth.py b/tests/tensor_search/test_model_auth.py
index 2210b385b..cd9128d6a 100644
--- a/tests/tensor_search/test_model_auth.py
+++ b/tests/tensor_search/test_model_auth.py
@@ -1236,7 +1236,7 @@ def test_1_load_model_from_hf_zip_file_with_auth_search(self):
         with unittest.mock.patch('transformers.AutoModel.from_pretrained', mock_automodel_from_pretrained):
             with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained', mock_autotokenizer_from_pretrained):
                 with unittest.mock.patch('marqo.s2_inference.model_downloading.from_hf.hf_hub_download', mock_hf_hub_download):
-                    with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.HuggingFaceModel."
+                    with unittest.mock.patch("marqo.core.inference.inference_models.hugging_face_model.HuggingFaceModel."
                     "extract_huggingface_archive", mock_extract_huggingface_archive):
                         try:
                             res = tensor_search.search(
@@ -1297,7 +1297,7 @@ def test_2_load_model_from_hf_zip_file_without_auth_search(self):
         with unittest.mock.patch('transformers.AutoModel.from_pretrained', mock_automodel_from_pretrained):
             with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained', mock_autotokenizer_from_pretrained):
                 with unittest.mock.patch('marqo.s2_inference.model_downloading.from_hf.hf_hub_download', mock_hf_hub_download):
-                    with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.HuggingFaceModel."
+                    with unittest.mock.patch("marqo.core.inference.inference_models.hugging_face_model.HuggingFaceModel."
                     "extract_huggingface_archive", mock_extract_huggingface_archive):
                         try:
                             res = tensor_search.search(
@@ -1369,7 +1369,7 @@ def test_3_load_model_from_s3_zip_file_with_auth_search(self):
             with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained',mock_autotokenizer_from_pretrained):
                 with unittest.mock.patch('boto3.client', return_value=mock_s3_client) as mock_boto3_client:
                     with unittest.mock.patch("marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url", mock_download_pretrained_from_url):
-                        with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.HuggingFaceModel."
+                        with unittest.mock.patch("marqo.core.inference.inference_models.hugging_face_model.HuggingFaceModel."
                         "extract_huggingface_archive", mock_extract_huggingface_archive):
                             try:
                                 res = tensor_search.search(
@@ -1424,7 +1424,7 @@ def test_4_load_model_from_public_url_zip_file_search(self):
 
         with mock.patch('transformers.AutoModel.from_pretrained', new=mock_automodel_from_pretrained):
             with mock.patch('marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url', new=mock_download):
-                with mock.patch("marqo.core.inference.models.hugging_face_model.HuggingFaceModel."
+                with mock.patch("marqo.core.inference.inference_models.hugging_face_model.HuggingFaceModel."
                 "extract_huggingface_archive", new=mock_extract_huggingface_archive):
                     res = tensor_search.search(config=self.config, text='hello', index_name=self.index_name_1)
 
@@ -1571,7 +1571,7 @@ def test_1_load_model_from_hf_zip_file_with_auth_add_documents(self):
         with unittest.mock.patch('transformers.AutoModel.from_pretrained', mock_automodel_from_pretrained):
             with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained', mock_autotokenizer_from_pretrained):
                 with unittest.mock.patch('marqo.s2_inference.model_downloading.from_hf.hf_hub_download', mock_hf_hub_download):
-                    with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.HuggingFaceModel."
+                    with unittest.mock.patch("marqo.core.inference.inference_models.hugging_face_model.HuggingFaceModel."
                     "extract_huggingface_archive", mock_extract_huggingface_archive):
                         try:
                             tensor_search.add_documents(config=self.config, add_docs_params=AddDocsParams(
@@ -1632,7 +1632,7 @@ def test_2_load_model_from_hf_zip_file_without_auth_add_documents(self):
         with unittest.mock.patch('transformers.AutoModel.from_pretrained', mock_automodel_from_pretrained):
             with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained', mock_autotokenizer_from_pretrained):
                 with unittest.mock.patch('marqo.s2_inference.model_downloading.from_hf.hf_hub_download', mock_hf_hub_download):
-                    with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.HuggingFaceModel."
+                    with unittest.mock.patch("marqo.core.inference.inference_models.hugging_face_model.HuggingFaceModel."
                     "extract_huggingface_archive", mock_extract_huggingface_archive):
                         try:
                             tensor_search.add_documents(config=self.config, add_docs_params=AddDocsParams(
@@ -1704,7 +1704,7 @@ def test_3_load_model_from_s3_zip_file_with_auth_add_documents(self):
                     with unittest.mock.patch(
                             "marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url",
                             mock_download_pretrained_from_url):
-                        with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.HuggingFaceModel."
+                        with unittest.mock.patch("marqo.core.inference.inference_models.hugging_face_model.HuggingFaceModel."
                         "extract_huggingface_archive",
                                                  mock_extract_huggingface_archive):
                             try:
@@ -1762,7 +1762,7 @@ def test_4_load_model_from_public_url_zip_file_add_documents(self):
 
         with mock.patch('transformers.AutoModel.from_pretrained', new=mock_automodel_from_pretrained):
             with mock.patch('marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url', new=mock_download):
-                with mock.patch("marqo.core.inference.models.hugging_face_model.HuggingFaceModel."
+                with mock.patch("marqo.core.inference.inference_models.hugging_face_model.HuggingFaceModel."
                 "extract_huggingface_archive", new=mock_extract_huggingface_archive):
                     tensor_search.add_documents(config=self.config, add_docs_params=AddDocsParams(
                         index_name=self.index_name_1, auto_refresh=True, docs=[{'a': 'b'}], device="cpu"))
@@ -2484,7 +2484,7 @@ def test_bulk_search(self):
                 with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained', mock_autotokenizer_from_pretrained):
                     with unittest.mock.patch('boto3.client', return_value=mock_s3_client) as mock_boto3_client:
                         with unittest.mock.patch("marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url",mock_download_pretrained_from_url):
-                            with unittest.mock.patch("marqo.core.inference.models.hugging_face_model.HuggingFaceModel."
+                            with unittest.mock.patch("marqo.core.inference.inference_models.hugging_face_model.HuggingFaceModel."
                             "extract_huggingface_archive", mock_extract_huggingface_archive):
                                 try:
                                     tensor_search.bulk_search(

From 42a56c54e8bec997994784eb46c4607f326d1a66 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Mon, 7 Oct 2024 16:51:30 +1100
Subject: [PATCH 40/63] Fix tests

---
 examples/GPT-examples/utilities.py            |  2 +-
 .../inference_models/abstract_clip_model.py   |  2 +-
 .../inference_models/open_clip_model.py       |  6 +++---
 .../test_open_clip_model_load.py              | 21 ++++++++++---------
 4 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/examples/GPT-examples/utilities.py b/examples/GPT-examples/utilities.py
index 0931e40bb..23ae6f0be 100644
--- a/examples/GPT-examples/utilities.py
+++ b/examples/GPT-examples/utilities.py
@@ -205,7 +205,7 @@ def _lies_between(offset_tuple, offset):
 
 def _find_end_character_mapping(offset_mapping, offset):
     """assumes sorted offset_mapping. unless this was modified 
-       this will be the default from the _tokenizer
+       this will be the default from the tokenizer
     """
     # if the max length is bigger we just return the last index
     if offset >= max(offset_mapping[-1]):
diff --git a/src/marqo/core/inference/inference_models/abstract_clip_model.py b/src/marqo/core/inference/inference_models/abstract_clip_model.py
index 466687900..c62363533 100644
--- a/src/marqo/core/inference/inference_models/abstract_clip_model.py
+++ b/src/marqo/core/inference/inference_models/abstract_clip_model.py
@@ -20,7 +20,7 @@ class AbstractCLIPModel(AbstractEmbeddingModel):
         model_properties (dict): A dictionary containing additional properties or configurations
             specific to the model. Defaults to an empty dictionary if not provided.
         model: The actual CLIP model instance, initialized to `None` and to be set by subclasses.
-        tokenizer: The _tokenizer associated with the model, initialized to `None` and to be set by subclasses.
+        tokenizer: The tokenizer associated with the model, initialized to `None` and to be set by subclasses.
         preprocess: The preprocessing pipeline for the model, initialized to `None` and to be set by subclasses.
     """
 
diff --git a/src/marqo/core/inference/inference_models/open_clip_model.py b/src/marqo/core/inference/inference_models/open_clip_model.py
index 447a954ed..62ced98f6 100644
--- a/src/marqo/core/inference/inference_models/open_clip_model.py
+++ b/src/marqo/core/inference/inference_models/open_clip_model.py
@@ -48,7 +48,7 @@ def _build_model_properties(self, model_properties: dict) -> OpenCLIPModelProper
                 from e
 
     def _load_necessary_components(self) -> None:
-        """Load the open_clip model and _tokenizer."""
+        """Load the open_clip model and tokenizer."""
         if self.model_properties.url is not None or self.model_properties.model_location is not None:
             self.model, self.preprocess = self._load_model_and_image_preprocessor_from_checkpoint()
             self.tokenizer = self._load_tokenizer_from_checkpoint()
@@ -205,11 +205,11 @@ def _load_model_and_image_preprocessor_from_open_clip_repo(self) -> Tuple[torch.
         return model, preprocess
 
     def _load_tokenizer_from_checkpoint(self) -> Callable:
-        if not self.model_properties._tokenizer:
+        if not self.model_properties.tokenizer:
             return open_clip.get_tokenizer(self.model_properties.name)
         else:
             logger.info(f"Custom HFTokenizer is provided. Loading...")
-            return HFTokenizer(self.model_properties._tokenizer)
+            return HFTokenizer(self.model_properties.tokenizer)
 
     def _load_tokenizer_from_hf_repo(self) -> Callable:
         return open_clip.get_tokenizer(self.model_properties.name)
diff --git a/tests/s2_inference/open_clip_models/test_open_clip_model_load.py b/tests/s2_inference/open_clip_models/test_open_clip_model_load.py
index 7fe31bfe6..621322d9b 100644
--- a/tests/s2_inference/open_clip_models/test_open_clip_model_load.py
+++ b/tests/s2_inference/open_clip_models/test_open_clip_model_load.py
@@ -3,6 +3,7 @@
 
 from marqo.core.inference.inference_models.open_clip_model import OPEN_CLIP
 from marqo.s2_inference.configs import ModelCache
+from marqo.s2_inference.errors import InvalidModelPropertiesError
 from marqo.s2_inference.model_registry import _get_open_clip_properties
 
 OPEN_CLIP_MODEL_PROPERTIES = _get_open_clip_properties()
@@ -27,9 +28,9 @@ def test_load_OpenCLIPModelFromCheckPointMethod_success(self):
             "type": "open_clip"
         }
 
-        with patch("marqo.core.inference.inference_models.open_clip_model import OPEN_CLIP._load_model_and_image_preprocessor_from_checkpoint", \
+        with patch("marqo.core.inference.inference_models.open_clip_model.OPEN_CLIP._load_model_and_image_preprocessor_from_checkpoint", \
                    return_value=(MagicMock(), MagicMock())) as mock_load_method:
-            with patch("marqo.core.inference.inference_models.open_clip_model import OPEN_CLIP._load_tokenizer_from_checkpoint",
+            with patch("marqo.core.inference.inference_models.open_clip_model.OPEN_CLIP._load_tokenizer_from_checkpoint",
                        return_value=MagicMock()) as mock_load_tokenizer:
                 with patch.object(MagicMock(), 'eval', return_value=None) as mock_eval:
                     model = OPEN_CLIP(model_properties=model_properties, device="cpu")
@@ -45,11 +46,11 @@ def test_load_OpenCLIPModelFromCheckPointParameters_success(self):
             "url": "https://openclipart.org/download/12345/my_test_model.pt",
             "type": "open_clip"
         }
-        with patch("marqo.s2_inference.clip_utils.open_clip.create_model", return_value=MagicMock()) \
+        with patch("marqo.core.inference.inference_models.open_clip_model.open_clip.create_model", return_value=MagicMock()) \
                 as mock_create_model:
-            with patch("marqo.s2_inference.clip_utils.open_clip.get_tokenizer", return_value=MagicMock()) \
+            with patch("marqo.core.inference.inference_models.open_clip_model.open_clip.get_tokenizer", return_value=MagicMock()) \
                     as mock_tokenizer:
-                with patch("marqo.s2_inference.clip_utils.download_model", return_value="my_test_model.pt"):
+                with patch("marqo.core.inference.inference_models.open_clip_model.download_model", return_value="my_test_model.pt"):
                     with patch.object(MagicMock(), 'eval', return_value=None) as mock_eval:
                         model = OPEN_CLIP(model_properties=model_properties, device="cpu")
                         model.load()
@@ -80,11 +81,11 @@ def test_load_OpenCLIPModelFromCheckPointPreprocessConfig(self):
             "image_preprocessor": "SigLIP",
             "size": 322  # Override the default size 224
         }
-        with patch("marqo.s2_inference.clip_utils.open_clip.create_model", return_value=MagicMock()) \
+        with patch("marqo.core.inference.inference_models.open_clip_model.open_clip.create_model", return_value=MagicMock()) \
                 as mock_create_model:
-            with patch("marqo.s2_inference.clip_utils.open_clip.get_tokenizer", return_value=MagicMock()) \
+            with patch("marqo.core.inference.inference_models.open_clip_model.open_clip.get_tokenizer", return_value=MagicMock()) \
                     as mock_tokenizer:
-                with patch("marqo.s2_inference.clip_utils.download_model", return_value="my_test_model.pt"):
+                with patch("marqo.core.inference.inference_models.open_clip_model.download_model", return_value="my_test_model.pt"):
                     with patch.object(MagicMock(), 'eval', return_value=None) as mock_eval:
                         model = OPEN_CLIP(model_properties=model_properties, device="cpu")
                         model.load()
@@ -156,7 +157,7 @@ def test_load_OpenCLIPModel_missing_model_properties(self):
             # Missing 'name' and 'url'
         }
 
-        with self.assertRaises(ValueError) as context:
+        with self.assertRaises(InvalidModelPropertiesError) as context:
             model = OPEN_CLIP(model_properties=model_properties, device="cpu")
             model.load()
 
@@ -172,7 +173,7 @@ def test_load_OpenCLIPModel_unsupported_image_preprocessor(self):
             "image_preprocessor": "UnsupportedPreprocessor"
         }
 
-        with self.assertRaises(ValueError) as context:
+        with self.assertRaises(InvalidModelPropertiesError) as context:
             model = OPEN_CLIP(model_properties=model_properties, device="cpu")
             model.load()
 

From 7aa5bef74e2787822423c566a8f3adfad7a8c15e Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Mon, 7 Oct 2024 18:15:11 +1100
Subject: [PATCH 41/63] Fix tests

---
 tests/core/inference/test_hugging_face_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/core/inference/test_hugging_face_model.py b/tests/core/inference/test_hugging_face_model.py
index 5fc32e7a8..2535ced5a 100644
--- a/tests/core/inference/test_hugging_face_model.py
+++ b/tests/core/inference/test_hugging_face_model.py
@@ -609,7 +609,7 @@ def test_initialize_huggingface_model_with_invalid_properties(self):
             with self.subTest(test_case=test_case):
                 with self.assertRaises(InvalidModelPropertiesError) as excinfo:
                     _ = HuggingFaceModel(test_case, "cpu", {})
-                self.assertIn("Invalid model properties for the 'hf' model.", str(excinfo.exception))
+                self.assertIn("Invalid model properties: ", str(excinfo.exception))
 
     def test_hf_e5_base_v2_embeddings_load_from_hf(self):
         """A test to ensure the embeddings are generated correctly for the default text model, loading from

From 835f6bc5c4d0889015007c9a200a33ca9c027037 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Thu, 10 Oct 2024 11:09:47 +1100
Subject: [PATCH 42/63] Fix regression

---
 .../abstract_embedding_model.py               |  3 +-
 .../inference_models/hugging_face_model.py    | 44 +++++++++++++++++--
 2 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/src/marqo/core/inference/inference_models/abstract_embedding_model.py b/src/marqo/core/inference/inference_models/abstract_embedding_model.py
index 0b4d09e22..016ad46cd 100644
--- a/src/marqo/core/inference/inference_models/abstract_embedding_model.py
+++ b/src/marqo/core/inference/inference_models/abstract_embedding_model.py
@@ -1,12 +1,13 @@
 from abc import ABC, abstractmethod
 from typing import Optional
+from marqo.tensor_search.models.private_models import ModelAuth
 
 
 class AbstractEmbeddingModel(ABC):
     """This is the abstract base class for all models in Marqo."""
 
     def __init__(self, model_properties: Optional[dict] = None, device: Optional[str] = None,
-                 model_auth: Optional[dict] = None):
+                 model_auth: Optional[ModelAuth] = None):
         """Load the model with the given properties.
 
         Args:
diff --git a/src/marqo/core/inference/inference_models/hugging_face_model.py b/src/marqo/core/inference/inference_models/hugging_face_model.py
index 086d0f787..cb7f4d091 100644
--- a/src/marqo/core/inference/inference_models/hugging_face_model.py
+++ b/src/marqo/core/inference/inference_models/hugging_face_model.py
@@ -1,7 +1,7 @@
 import os
 import tarfile
 import zipfile
-from typing import Tuple, Callable
+from typing import Tuple, Callable, Optional
 
 import numpy as np
 import torch
@@ -17,12 +17,13 @@
 from marqo.s2_inference.errors import InvalidModelPropertiesError
 from marqo.s2_inference.types import Union, FloatTensor, List
 from marqo.core.exceptions import InternalError
+from marqo.tensor_search.models.private_models import ModelAuth
 
 
 class HuggingFaceModel(AbstractEmbeddingModel):
     """The concrete class for all sentence transformers models loaded from Hugging Face."""
 
-    def __init__(self, model_properties: dict, device: str, model_auth: dict):
+    def __init__(self, model_properties: dict, device: str, model_auth: Optional[ModelAuth] = None):
         super().__init__(model_properties, device, model_auth)
 
         self.model_properties = self._build_model_properties(model_properties)
@@ -50,8 +51,22 @@ def _check_loaded_components(self):
     def _load_necessary_components(self):
         if self.model_properties.name:
             self._model, self._tokenizer = self._load_from_hugging_face_repo()
-        elif self.model_properties.url or self.model_properties.model_location:
+        elif self.model_properties.url:
             self._model, self._tokenizer = self._load_from_zip_file()
+        elif self.model_properties.model_location:
+            if self.model_properties.model_location.s3:
+                self._model, self._tokenizer = self._load_from_zip_file()
+            elif self.model_properties.model_location.hf:
+                if self.model_properties.model_location.hf.filename:
+                    self._model, self._tokenizer = self._load_from_zip_file()
+                else:
+                    self._model, self._tokenizer = self._load_from_private_hugging_face_repo()
+            else:
+                raise InvalidModelPropertiesError(
+                    f"Invalid model properties for the 'hf' model. "
+                    f"You do not have the necessary information to load the model. "
+                    f"Check {marqo_docs.bring_your_own_model()} for more information."
+                )
         else:
             raise InvalidModelPropertiesError(
                 f"Invalid model properties for the 'hf' model. "
@@ -62,6 +77,29 @@ def _load_necessary_components(self):
         self._pooling_func = self._load_pooling_method()
         self._model.eval()
 
+    def _load_from_private_hugging_face_repo(self) -> Tuple:
+        """Load the model from the private Hugging Face model hub based on the model_location."""
+
+        hf_repo_token = None
+        if self.model_auth is not None and self.model_auth.hf is not None:
+            hf_repo_token = self.model_auth.hf.token
+
+        try:
+            model = AutoModel.from_pretrained(
+                self.model_properties.model_location.hf.repo_id,
+                use_auth_token=hf_repo_token
+            )
+            tokenizer = AutoTokenizer.from_pretrained(
+                self.model_properties.model_location.hf.repo_id,
+                use_auth_token=hf_repo_token
+            )
+        except (OSError, ValueError, RuntimeError) as e:
+            raise InvalidModelPropertiesError(
+                f"Marqo encountered an error loading the private Hugging Face model, modelProperties={self.model_properties}. "
+                f"Please ensure that the model is a valid Hugging Face model and retry.\n"
+                f" Original error message = {e}") from e
+        return model, tokenizer
+
     def _load_from_hugging_face_repo(self) -> Tuple:
         """Load the model from the Hugging Face model hub based on the repo_id."""
         try:

From bcb47fdd5cf0dc22bdd6bc4272ff4099ec6a04e3 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Thu, 10 Oct 2024 11:13:39 +1100
Subject: [PATCH 43/63] Fix tests

---
 .../inference_models/hugging_face_model.py    | 44 +++++++++++++++++--
 1 file changed, 41 insertions(+), 3 deletions(-)

diff --git a/src/marqo/core/inference/inference_models/hugging_face_model.py b/src/marqo/core/inference/inference_models/hugging_face_model.py
index 086d0f787..c943823f4 100644
--- a/src/marqo/core/inference/inference_models/hugging_face_model.py
+++ b/src/marqo/core/inference/inference_models/hugging_face_model.py
@@ -1,7 +1,7 @@
 import os
 import tarfile
 import zipfile
-from typing import Tuple, Callable
+from typing import Tuple, Callable, Optional
 
 import numpy as np
 import torch
@@ -17,12 +17,13 @@
 from marqo.s2_inference.errors import InvalidModelPropertiesError
 from marqo.s2_inference.types import Union, FloatTensor, List
 from marqo.core.exceptions import InternalError
+from marqo.tensor_search.models.private_models import ModelAuth
 
 
 class HuggingFaceModel(AbstractEmbeddingModel):
     """The concrete class for all sentence transformers models loaded from Hugging Face."""
 
-    def __init__(self, model_properties: dict, device: str, model_auth: dict):
+    def __init__(self, model_properties: dict, device: str, model_auth: Optional[ModelAuth] = None):
         super().__init__(model_properties, device, model_auth)
 
         self.model_properties = self._build_model_properties(model_properties)
@@ -50,8 +51,22 @@ def _check_loaded_components(self):
     def _load_necessary_components(self):
         if self.model_properties.name:
             self._model, self._tokenizer = self._load_from_hugging_face_repo()
-        elif self.model_properties.url or self.model_properties.model_location:
+        elif self.model_properties.url:
             self._model, self._tokenizer = self._load_from_zip_file()
+        elif self.model_properties.model_location:
+            if self.model_properties.model_location.s3:
+                self._model, self._tokenizer = self._load_from_zip_file()
+            elif self.model_properties.model_location.hf:
+                if self.model_properties.model_location.hf.filename:
+                    self._model, self._tokenizer = self._load_from_zip_file()
+                else:
+                    self._model, self._tokenizer = self._load_from_private_hugging_face_repo()
+            else:
+                raise InvalidModelPropertiesError(
+                    f"Invalid model properties for the 'hf' model. "
+                    f"You do not have the necessary information to load the model. "
+                    f"Check {marqo_docs.bring_your_own_model()} for more information."
+                )
         else:
             raise InvalidModelPropertiesError(
                 f"Invalid model properties for the 'hf' model. "
@@ -62,6 +77,29 @@ def _load_necessary_components(self):
         self._pooling_func = self._load_pooling_method()
         self._model.eval()
 
+    def _load_from_private_hugging_face_repo(self) -> Tuple:
+        """Load the model from the private Hugging Face model hub based on the model_location."""
+
+        hf_repo_token = None
+        if self.model_auth is not None and self.model_auth.hf is not None:
+            hf_repo_token = self.model_auth.hf.token
+
+        try:
+            model = AutoModel.from_pretrained(
+                self.model_properties.model_location.hf.repo_id,
+                use_auth_token=hf_repo_token
+            )
+            tokenizer = AutoTokenizer.from_pretrained(
+                self.model_properties.model_location.hf.repo_id,
+                use_auth_token=hf_repo_token
+            )
+        except (OSError, ValueError, RuntimeError) as e:
+            raise InvalidModelPropertiesError(
+                f"Marqo encountered an error loading the private Hugging Face model, modelProperties={self.model_properties}. "
+                f"Please ensure that the model is a valid Hugging Face model and you have provided the right token. "
+                f"Original error message = {e}") from e
+        return model, tokenizer
+
     def _load_from_hugging_face_repo(self) -> Tuple:
         """Load the model from the Hugging Face model hub based on the repo_id."""
         try:

From 2a1132af1132ec79e9cf8cd7d206f6354a14ff15 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Thu, 10 Oct 2024 11:19:45 +1100
Subject: [PATCH 44/63] Fix load path regression

---
 .../core/inference/inference_models/hugging_face_model.py     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/marqo/core/inference/inference_models/hugging_face_model.py b/src/marqo/core/inference/inference_models/hugging_face_model.py
index cb7f4d091..7b0374776 100644
--- a/src/marqo/core/inference/inference_models/hugging_face_model.py
+++ b/src/marqo/core/inference/inference_models/hugging_face_model.py
@@ -87,11 +87,11 @@ def _load_from_private_hugging_face_repo(self) -> Tuple:
         try:
             model = AutoModel.from_pretrained(
                 self.model_properties.model_location.hf.repo_id,
-                use_auth_token=hf_repo_token
+                token=hf_repo_token
             )
             tokenizer = AutoTokenizer.from_pretrained(
                 self.model_properties.model_location.hf.repo_id,
-                use_auth_token=hf_repo_token
+                token=hf_repo_token
             )
         except (OSError, ValueError, RuntimeError) as e:
             raise InvalidModelPropertiesError(

From 4ad03a435b11ee9b18052a64720148ec38703178 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Fri, 11 Oct 2024 16:46:28 +1100
Subject: [PATCH 45/63] update dependencies

---
 .../abstract_embedding_model.py               | 39 ++++++++++++++++++-
 .../inference_models/hugging_face_model.py    | 33 +++++++++-------
 2 files changed, 56 insertions(+), 16 deletions(-)

diff --git a/src/marqo/core/inference/inference_models/abstract_embedding_model.py b/src/marqo/core/inference/inference_models/abstract_embedding_model.py
index 016ad46cd..15e881b84 100644
--- a/src/marqo/core/inference/inference_models/abstract_embedding_model.py
+++ b/src/marqo/core/inference/inference_models/abstract_embedding_model.py
@@ -1,7 +1,10 @@
 from abc import ABC, abstractmethod
-from typing import Optional
+from typing import Optional, Any
 from marqo.tensor_search.models.private_models import ModelAuth
 
+from marqo.s2_inference.multimodal_model_load import Modality
+import numpy as np
+
 
 class AbstractEmbeddingModel(ABC):
     """This is the abstract base class for all models in Marqo."""
@@ -48,5 +51,37 @@ def _check_loaded_components(self):
         pass
 
     @abstractmethod
-    def encode(self):
+    def _validate_content_type(self, content: Any, modality: Modality):
+        """Validate if the provided content type is valid for the specific model and if it matches the modality.
+
+        Raise:
+            ValueError: If the content type is not valid.
+        """
+        pass
+
+    @abstractmethod
+    def _encode(self, content: Any, modality: Modality, normalize: bool = True) -> np.ndarray:
+        """Encode the given content.
+
+        Args:
+            content (Any): The content to encode.
+            normalize (bool): Whether to normalize the output or not.
+        """
         pass
+
+    @abstractmethod
+    def _set_default_modality(self) -> Modality:
+        """Set the default modality for the model.
+
+        Returns:
+            Modality: The default modality for the model.
+        """
+        pass
+
+    @abstractmethod
+    def encode(self, content: Any, normalize: bool = True, modality: Optional[Modality] = None) -> np.ndarray:
+        if modality is None:
+            modality = self._set_default_modality()
+
+        self._validate_content_type(content, modality)
+        return self._encode(content, modality, normalize)
\ No newline at end of file
diff --git a/src/marqo/core/inference/inference_models/hugging_face_model.py b/src/marqo/core/inference/inference_models/hugging_face_model.py
index 7b0374776..1b693054a 100644
--- a/src/marqo/core/inference/inference_models/hugging_face_model.py
+++ b/src/marqo/core/inference/inference_models/hugging_face_model.py
@@ -49,30 +49,35 @@ def _check_loaded_components(self):
             raise InternalError("Pooling function is not loaded!")
 
     def _load_necessary_components(self):
+        """Load the necessary components for the hf model.
+
+        Raises:
+            InvalidModelPropertiesError: If the model properties are invalid or incomplete.
+        """
+        if not (self.model_properties.name or self.model_properties.url or self.model_properties.model_location):
+            raise InvalidModelPropertiesError(
+                f"Invalid model properties for the 'hf' model. "
+                f"You do not have the necessary information to load the model. "
+                f"Check {marqo_docs.bring_your_own_model()} for more information."
+            )
+
         if self.model_properties.name:
             self._model, self._tokenizer = self._load_from_hugging_face_repo()
-        elif self.model_properties.url:
+        elif self.model_properties.url or (
+                self.model_properties.model_location and self.model_properties.model_location.s3):
             self._model, self._tokenizer = self._load_from_zip_file()
-        elif self.model_properties.model_location:
-            if self.model_properties.model_location.s3:
+        elif self.model_properties.model_location and self.model_properties.model_location.hf:
+            if self.model_properties.model_location.hf.filename:
                 self._model, self._tokenizer = self._load_from_zip_file()
-            elif self.model_properties.model_location.hf:
-                if self.model_properties.model_location.hf.filename:
-                    self._model, self._tokenizer = self._load_from_zip_file()
-                else:
-                    self._model, self._tokenizer = self._load_from_private_hugging_face_repo()
             else:
-                raise InvalidModelPropertiesError(
-                    f"Invalid model properties for the 'hf' model. "
-                    f"You do not have the necessary information to load the model. "
-                    f"Check {marqo_docs.bring_your_own_model()} for more information."
-                )
+                self._model, self._tokenizer = self._load_from_private_hugging_face_repo()
         else:
             raise InvalidModelPropertiesError(
                 f"Invalid model properties for the 'hf' model. "
                 f"You do not have the necessary information to load the model. "
                 f"Check {marqo_docs.bring_your_own_model()} for more information."
             )
+
         self._model = self._model.to(self.device)
         self._pooling_func = self._load_pooling_method()
         self._model.eval()
@@ -139,7 +144,7 @@ def _load_pooling_method(self) -> Callable:
         elif self.model_properties.pooling_method == PoolingMethod.CLS:
             return self._cls_pool_func
         else:
-            raise ValueError(f"Invalid pooling method: {self.model_properties.pooling_method}")
+            raise InternalError(f"Invalid pooling method: {self.model_properties.pooling_method}")
 
     def encode(self, sentence: Union[str, List[str]], normalize=True, **kwargs) -> Union[FloatTensor, np.ndarray]:
         if isinstance(sentence, str):

From 17a17e97b9e984f81ab5b3b00df6c2d21ecd8f6b Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Fri, 11 Oct 2024 17:08:04 +1100
Subject: [PATCH 46/63] Upgrade comments

---
 .../inference_models/abstract_embedding_model.py       | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/marqo/core/inference/inference_models/abstract_embedding_model.py b/src/marqo/core/inference/inference_models/abstract_embedding_model.py
index 15e881b84..1a3a70c16 100644
--- a/src/marqo/core/inference/inference_models/abstract_embedding_model.py
+++ b/src/marqo/core/inference/inference_models/abstract_embedding_model.py
@@ -70,8 +70,11 @@ def _encode(self, content: Any, modality: Modality, normalize: bool = True) -> n
         pass
 
     @abstractmethod
-    def _set_default_modality(self) -> Modality:
-        """Set the default modality for the model.
+    def _set_modality(self) -> Modality:
+        """Set the modalities for the model.
+
+        We are inferring the modality of the content regardless of the model capabilities. For example, if user provides
+        an image url in the search query, we will infer the modality as image even if the model is a text model.
 
         Returns:
             Modality: The default modality for the model.
@@ -80,8 +83,7 @@ def _set_default_modality(self) -> Modality:
 
     @abstractmethod
     def encode(self, content: Any, normalize: bool = True, modality: Optional[Modality] = None) -> np.ndarray:
-        if modality is None:
-            modality = self._set_default_modality()
+        modality = self._set_modality()
 
         self._validate_content_type(content, modality)
         return self._encode(content, modality, normalize)
\ No newline at end of file

From 3098ce63d769439722cd01edd9a9cbac267de8f8 Mon Sep 17 00:00:00 2001
From: Li Wan <lwan3@student.unimelb.edu.au>
Date: Mon, 14 Oct 2024 13:12:13 +1100
Subject: [PATCH 47/63] Finish abstract

---
 src/marqo/core/inference/enums.py             |  8 +++
 .../inference_models/abstract_clip_model.py   | 69 +++++++++++++------
 .../abstract_embedding_model.py               | 17 ++---
 3 files changed, 65 insertions(+), 29 deletions(-)
 create mode 100644 src/marqo/core/inference/enums.py

diff --git a/src/marqo/core/inference/enums.py b/src/marqo/core/inference/enums.py
new file mode 100644
index 000000000..6c643f1a6
--- /dev/null
+++ b/src/marqo/core/inference/enums.py
@@ -0,0 +1,8 @@
+from enum import Enum
+
+
+class Modality(str, Enum):
+    TEXT = "language"
+    IMAGE = "image"
+    VIDEO = "video"
+    AUDIO = "audio"
\ No newline at end of file
diff --git a/src/marqo/core/inference/inference_models/abstract_clip_model.py b/src/marqo/core/inference/inference_models/abstract_clip_model.py
index c62363533..85bcbbc99 100644
--- a/src/marqo/core/inference/inference_models/abstract_clip_model.py
+++ b/src/marqo/core/inference/inference_models/abstract_clip_model.py
@@ -1,13 +1,17 @@
 from abc import abstractmethod
 
+import numpy as np
 from PIL import UnidentifiedImageError
 
+from marqo.core.exceptions import InternalError
 from marqo.core.inference.inference_models.abstract_embedding_model import AbstractEmbeddingModel
 from marqo.s2_inference.types import *
 from marqo.core.inference.inference_models.image_download import (_is_image, format_and_load_CLIP_images,
                                                                   format_and_load_CLIP_image)
 from marqo.s2_inference.logger import get_logger
 import torch
+from marqo.core.inference.enums import Modality
+from marqo.s2_inference.errors import UnsupportedModalityError
 
 logger = get_logger(__name__)
 
@@ -42,34 +46,57 @@ def __init__(self, device: Optional[str] = None, model_properties: Optional[dict
         self.preprocess = None
 
     @abstractmethod
-    def encode_text(self, inputs: Union[str, List[str]], normalize: bool = True) -> FloatTensor:
+    def encode_text(self, inputs: Union[str, List[str]], normalize: bool = True) -> np.ndarray:
         pass
 
     @abstractmethod
-    def encode_image(self, inputs, normalize: bool = True, image_download_headers: dict = None) -> FloatTensor:
+    def encode_image(self, inputs, normalize: bool = True, image_download_headers: dict = None) -> np.ndarray:
         pass
 
-    def encode(self, inputs: Union[str, ImageType, List[Union[str, ImageType]]],
-               default: str = 'text', normalize=True, **kwargs) -> FloatTensor:
-        infer = kwargs.pop('infer', True)
-
-        if infer and _is_image(inputs):
-            is_image = True
+    def _validate_and_set_modality(self, modality: Optional[Modality] = None) -> Modality:
+        if modality is None:
+            return Modality.TEXT
+        elif modality in [Modality.TEXT, Modality.IMAGE]:
+            return modality
         else:
-            if default == 'text':
-                is_image = False
-            elif default == 'image':
-                is_image = True
-            else:
-                raise UnidentifiedImageError(f"expected default='image' or default='text' but received {default}")
-
-        if is_image:
-            logger.debug('image')
-            image_download_headers = kwargs.get("image_download_headers", dict())
-            return self.encode_image(inputs, normalize=normalize, image_download_headers=image_download_headers)
+            raise UnidentifiedImageError(
+                f"The model expected modality to be one of {Modality.TEXT} "
+                f"or {Modality.IMAGE} but received {modality}."
+        )
+
+    def _validate_content_type(self, content: Any, modality: Modality) -> None:
+        """Validate if the provided content type is valid for the specific model and if it matches the modality.
+
+        Args:
+            content (Any): The content to validate.
+            modality (Modality): The modality of the content.
+
+        Raises:
+            ValueError: If the content type is not valid.
+        """
+
+        # TODO: Implement this method
+        pass
+
+
+    def _encode(self, content: Union[str, ImageType, List[str], List[ImageType], Tensor],
+                modality: Modality, normalize: bool = True) -> np.ndarray:
+        """Encode the given content.
+
+        Args:
+            content (): The content to encode.
+            modality (Modality): The modality of the content.
+            normalize (bool): Whether to normalize the output embeddings.
+
+        Returns:
+            np.ndarray: The encoded content.
+        """
+        if modality == Modality.TEXT:
+            return self.encode_text(content, normalize)
+        elif modality == Modality.IMAGE:
+            return self.encode_image(content, normalize)
         else:
-            logger.debug('text')
-            return self.encode_text(inputs, normalize=normalize)
+            raise InternalError(f"Unsupported modality: {modality}")
 
     def _convert_output(self, output):
         if self.device == 'cpu':
diff --git a/src/marqo/core/inference/inference_models/abstract_embedding_model.py b/src/marqo/core/inference/inference_models/abstract_embedding_model.py
index 1a3a70c16..bef14da6e 100644
--- a/src/marqo/core/inference/inference_models/abstract_embedding_model.py
+++ b/src/marqo/core/inference/inference_models/abstract_embedding_model.py
@@ -2,7 +2,7 @@
 from typing import Optional, Any
 from marqo.tensor_search.models.private_models import ModelAuth
 
-from marqo.s2_inference.multimodal_model_load import Modality
+from marqo.core.inference.enums import Modality
 import numpy as np
 
 
@@ -70,20 +70,21 @@ def _encode(self, content: Any, modality: Modality, normalize: bool = True) -> n
         pass
 
     @abstractmethod
-    def _set_modality(self) -> Modality:
-        """Set the modalities for the model.
+    def _validate_and_set_modality(self, modality) -> Modality:
+        """Validate the modalities for the model.
 
         We are inferring the modality of the content regardless of the model capabilities. For example, if user provides
         an image url in the search query, we will infer the modality as image even if the model is a text model.
 
         Returns:
-            Modality: The default modality for the model.
+            Modality: The modalities for the model content.
+
+        Raises:
+            UnsupportedModalityError: If the model does not support the inferred modality other than text.
         """
         pass
 
-    @abstractmethod
-    def encode(self, content: Any, normalize: bool = True, modality: Optional[Modality] = None) -> np.ndarray:
-        modality = self._set_modality()
-
+    def encode(self, content: Any, normalize: bool = True, modality: Optional[Modality] = None, **kwargs) -> np.ndarray:
+        modality = self._validate_and_set_modality(modality)
         self._validate_content_type(content, modality)
         return self._encode(content, modality, normalize)
\ No newline at end of file

From f7c5d472171a1a03b1e5a583b4583c769d2f0120 Mon Sep 17 00:00:00 2001
From: Li Wan <li@marqo.ai>
Date: Mon, 21 Oct 2024 15:03:59 +1100
Subject: [PATCH 48/63] Add private model tests

---
 .../inference_models/abstract_clip_model.py   |  38 ++--
 .../abstract_embedding_model.py               |  46 +----
 .../inference/test_private_model_loading.py   | 172 ++++++++++++++++++
 3 files changed, 197 insertions(+), 59 deletions(-)
 create mode 100644 tests/core/inference/test_private_model_loading.py

diff --git a/src/marqo/core/inference/inference_models/abstract_clip_model.py b/src/marqo/core/inference/inference_models/abstract_clip_model.py
index 85bcbbc99..06662bf11 100644
--- a/src/marqo/core/inference/inference_models/abstract_clip_model.py
+++ b/src/marqo/core/inference/inference_models/abstract_clip_model.py
@@ -78,25 +78,27 @@ def _validate_content_type(self, content: Any, modality: Modality) -> None:
         # TODO: Implement this method
         pass
 
-
-    def _encode(self, content: Union[str, ImageType, List[str], List[ImageType], Tensor],
-                modality: Modality, normalize: bool = True) -> np.ndarray:
-        """Encode the given content.
-
-        Args:
-            content (): The content to encode.
-            modality (Modality): The modality of the content.
-            normalize (bool): Whether to normalize the output embeddings.
-
-        Returns:
-            np.ndarray: The encoded content.
-        """
-        if modality == Modality.TEXT:
-            return self.encode_text(content, normalize)
-        elif modality == Modality.IMAGE:
-            return self.encode_image(content, normalize)
+    def encode(self, inputs: Union[str, ImageType, List[Union[str, ImageType]]],
+               default: str = 'text', normalize=True, **kwargs) -> np.ndarray:
+        infer = kwargs.pop('infer', True)
+        if infer and _is_image(inputs):
+            is_image = True
+        else:
+            is_image = False
+            if default == 'text':
+                is_image = False
+            elif default == 'image':
+                is_image = True
+            else:
+                raise UnidentifiedImageError(f"expected default='image' or default='text' but received {default}")
+
+        if is_image:
+            logger.debug('image')
+            image_download_headers = kwargs.get("image_download_headers", dict())
+            return self.encode_image(inputs, normalize=normalize, image_download_headers=image_download_headers)
         else:
-            raise InternalError(f"Unsupported modality: {modality}")
+            logger.debug('text')
+            return self.encode_text(inputs, normalize=normalize)
 
     def _convert_output(self, output):
         if self.device == 'cpu':
diff --git a/src/marqo/core/inference/inference_models/abstract_embedding_model.py b/src/marqo/core/inference/inference_models/abstract_embedding_model.py
index bef14da6e..2339f4e5a 100644
--- a/src/marqo/core/inference/inference_models/abstract_embedding_model.py
+++ b/src/marqo/core/inference/inference_models/abstract_embedding_model.py
@@ -1,9 +1,7 @@
 from abc import ABC, abstractmethod
-from typing import Optional, Any
-from marqo.tensor_search.models.private_models import ModelAuth
+from typing import Optional
 
-from marqo.core.inference.enums import Modality
-import numpy as np
+from marqo.tensor_search.models.private_models import ModelAuth
 
 
 class AbstractEmbeddingModel(ABC):
@@ -51,40 +49,6 @@ def _check_loaded_components(self):
         pass
 
     @abstractmethod
-    def _validate_content_type(self, content: Any, modality: Modality):
-        """Validate if the provided content type is valid for the specific model and if it matches the modality.
-
-        Raise:
-            ValueError: If the content type is not valid.
-        """
-        pass
-
-    @abstractmethod
-    def _encode(self, content: Any, modality: Modality, normalize: bool = True) -> np.ndarray:
-        """Encode the given content.
-
-        Args:
-            content (Any): The content to encode.
-            normalize (bool): Whether to normalize the output or not.
-        """
-        pass
-
-    @abstractmethod
-    def _validate_and_set_modality(self, modality) -> Modality:
-        """Validate the modalities for the model.
-
-        We are inferring the modality of the content regardless of the model capabilities. For example, if user provides
-        an image url in the search query, we will infer the modality as image even if the model is a text model.
-
-        Returns:
-            Modality: The modalities for the model content.
-
-        Raises:
-            UnsupportedModalityError: If the model does not support the inferred modality other than text.
-        """
-        pass
-
-    def encode(self, content: Any, normalize: bool = True, modality: Optional[Modality] = None, **kwargs) -> np.ndarray:
-        modality = self._validate_and_set_modality(modality)
-        self._validate_content_type(content, modality)
-        return self._encode(content, modality, normalize)
\ No newline at end of file
+    def encode(self):
+        """Encode the input data."""
+        pass
\ No newline at end of file
diff --git a/tests/core/inference/test_private_model_loading.py b/tests/core/inference/test_private_model_loading.py
new file mode 100644
index 000000000..cb24a250b
--- /dev/null
+++ b/tests/core/inference/test_private_model_loading.py
@@ -0,0 +1,172 @@
+import os
+from typing import Dict
+
+from marqo.core.exceptions import IndexNotFoundError
+from marqo.core.models.add_docs_params import AddDocsParams
+from marqo.tensor_search.api import create_index
+from marqo.tensor_search.models.index_settings import IndexSettings
+from tests.marqo_test import MarqoTestCase, TestImageUrls
+
+
+class TestPrivateModelLoading(MarqoTestCase):
+    """A test class for loading private models end to end in Marqo."""
+
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        cls.aws_access_key_id = os.getenv("PRIVATE_MODEL_TESTS_AWS_ACCESS_KEY_ID", None)
+        cls.aws_secret_access_key = os.getenv("PRIVATE_MODEL_TESTS_AWS_SECRET_ACCESS_KEY", None)
+        cls.hf_token = os.getenv("PRIVATE_MODEL_TESTS_HF_TOKEN", None)
+
+        if any([cls.aws_access_key_id is None, cls.aws_secret_access_key is None, cls.hf_token is None]):
+            raise ValueError("Please set the AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, "
+                             "and HF_TOKEN environment variables to run this test.")
+
+        cls.index_name = "test_index_private_model_loading"
+
+    def setUp(self):
+        super().setUp()
+        try:
+            self.index_management.delete_index_by_name(self.index_name)
+        except IndexNotFoundError:
+            pass
+
+    def tearDown(self):
+        super().setUp()
+        try:
+            self.index_management.delete_index_by_name(self.index_name)
+        except IndexNotFoundError:
+            pass
+
+    def _help_test_index(self, model: str, model_properties: Dict):
+        index_settings = IndexSettings(
+            model=model,
+            modelProperties=model_properties,
+            type="unstructured",
+            treatUrlsAndPointersAsMedia=True
+        )
+        create_index(self.index_name, index_settings, self.config)
+
+    def test_load_private_hf_model_from_a_private_zip_file_on_s3(self):
+        model = "private-e5-zip-on-s3"
+        model_properties = {
+            "dimensions": 768,
+            "type": "hf",
+            "modelLocation": {
+                "s3": {
+                    "Bucket": "marqo-opensource-private-model-tests",
+                    "Key": "private-e5-model.zip"
+                },
+                "auth_required": True
+            }
+        }
+        self._help_test_index(model, model_properties)
+        add_docs_params = AddDocsParams(
+            index_name=self.index_name,
+            docs=[{
+                "id": "1",
+                "text": "This is a test document."
+            }],
+            tensor_fields = ["text"],
+            model_auth={
+                "s3": {
+                    "aws_access_key_id": self.aws_access_key_id,
+                    "aws_secret_access_key": self.aws_secret_access_key
+                }
+            }
+        )
+
+        res = self.add_documents(self.config, add_docs_params= add_docs_params)
+        self.assertEqual(res.errors, False)
+
+    def test_load_private_hf_model_from_a_private_hf_repo(self):
+        model = "private-e5-repo-on-hf"
+        model_properties = {
+            "dimensions": 768,
+            "type": "hf",
+            "modelLocation": {
+                "hf": {
+                    "repoId": "Marqo/e5-base-v2-private-test"
+                },
+                "auth_required": True
+            }
+        }
+        self._help_test_index(model, model_properties)
+        add_docs_params = AddDocsParams(
+            index_name=self.index_name,
+            docs=[{
+                "id": "1",
+                "text": "This is a test document."
+            }],
+            tensor_fields = ["text"],
+            model_auth={
+                "hf": {"token": self.hf_token}
+            }
+        )
+
+        res = self.add_documents(self.config, add_docs_params= add_docs_params)
+        self.assertEqual(res.errors, False)
+
+    def test_load_private_open_clip_model_from_a_private_ckpt_on_s3(self):
+        model = "private-marqo-fashion-siglip-model-on-s3"
+        model_properties = {
+            "dimensions": 768,
+            "name": "ViT-B-16-SigLIP",
+            "type": "open_clip",
+            "imagePreprocessor": "SigLIP",
+            "modelLocation": {
+                "s3": {
+                    "Bucket": "marqo-opensource-private-model-tests",
+                    "Key": "private-marqo-fashion-siglip-ckpt.bin"
+                },
+                "auth_required": True
+            }
+        }
+        self._help_test_index(model, model_properties)
+        add_docs_params = AddDocsParams(
+            index_name=self.index_name,
+            docs=[{
+                "id": "1",
+                "text": "This is a test document.",
+                "image": TestImageUrls.IMAGE2
+            }],
+            tensor_fields=["text", "image"],
+            model_auth={
+                "s3": {
+                    "aws_access_key_id": self.aws_access_key_id,
+                    "aws_secret_access_key": self.aws_secret_access_key
+                }
+            }
+        )
+        res = self.add_documents(self.config, add_docs_params=add_docs_params)
+        self.assertEqual(res.status_code, 200)
+
+    def test_load_private_open_clip_model_from_a_private_ckpt_on_hf(self):
+        model = "private-marqo-fashion-siglip-model-ckpt-on-hf"
+        model_properties = {
+            "dimensions": 768,
+            "name": "ViT-B-16",
+            "type": "open_clip",
+            "modelLocation": {
+                "hf": {
+                    "repoId": "Marqo/private-ecommerce-embeddings-B",
+                    "filename": "open_clip_pytorch_model.bin"
+                },
+                "auth_required": True
+            }
+        }
+        self._help_test_index(model, model_properties)
+        add_docs_params = AddDocsParams(
+            index_name=self.index_name,
+            docs=[{
+                "id": "1",
+                "text": "This is a test document.",
+                "image": TestImageUrls.IMAGE2
+            }],
+            tensor_fields=["text", "image"],
+            model_auth={
+                "hf": {"token": self.hf_token}
+            }
+        )
+        res = self.add_documents(self.config, add_docs_params=add_docs_params)
+        self.assertEqual(res.status_code, 200)
\ No newline at end of file

From f8fd031bd962ccb098eb0fc4c49d6e43380fecea Mon Sep 17 00:00:00 2001
From: Li Wan <li@marqo.ai>
Date: Mon, 21 Oct 2024 15:38:15 +1100
Subject: [PATCH 49/63] Add private model tests

---
 .github/workflows/unit_test_200gb_CI.yml      |  4 +++
 .../inference/test_private_model_loading.py   | 29 +++++++++++--------
 tests/marqo_test.py                           |  2 +-
 3 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/unit_test_200gb_CI.yml b/.github/workflows/unit_test_200gb_CI.yml
index 785a2af39..7b08028bf 100644
--- a/.github/workflows/unit_test_200gb_CI.yml
+++ b/.github/workflows/unit_test_200gb_CI.yml
@@ -155,6 +155,10 @@ jobs:
           export VESPA_DOCUMENT_URL=http://localhost:8080
           export VESPA_QUERY_URL=http://localhost:8080
           
+          export PRIVATE_MODEL_TESTS_AWS_ACCESS_KEY_ID=${{ secrets.PRIVATE_MODEL_TESTS_AWS_ACCESS_KEY_ID }}
+          export PRIVATE_MODEL_TESTS_AWS_SECRET_ACCESS_KEY=${{ secrets.PRIVATE_MODEL_TESTS_AWS_SECRET_ACCESS_KEY }}
+          export PRIVATE_MODEL_TESTS_HF_TOKEN=${{ secrets.PRIVATE_MODEL_TESTS_HF_TOKEN }}
+          
           cd marqo
           export PYTHONPATH="./tests:./src:."
           pytest --ignore=tests/test_documentation.py --durations=100 --cov=src --cov-branch --cov-context=test --cov-report=html:cov_html --cov-report=lcov:lcov.info tests
diff --git a/tests/core/inference/test_private_model_loading.py b/tests/core/inference/test_private_model_loading.py
index cb24a250b..f26ac805e 100644
--- a/tests/core/inference/test_private_model_loading.py
+++ b/tests/core/inference/test_private_model_loading.py
@@ -77,7 +77,8 @@ def test_load_private_hf_model_from_a_private_zip_file_on_s3(self):
         )
 
         res = self.add_documents(self.config, add_docs_params= add_docs_params)
-        self.assertEqual(res.errors, False)
+        self.assertEqual(False, res.errors)
+        self.assertEqual(self.monitoring.get_index_stats_by_name(self.index_name).number_of_documents, 1)
 
     def test_load_private_hf_model_from_a_private_hf_repo(self):
         model = "private-e5-repo-on-hf"
@@ -105,19 +106,19 @@ def test_load_private_hf_model_from_a_private_hf_repo(self):
         )
 
         res = self.add_documents(self.config, add_docs_params= add_docs_params)
-        self.assertEqual(res.errors, False)
+        self.assertEqual(False, res.errors)
+        self.assertEqual(self.monitoring.get_index_stats_by_name(self.index_name).number_of_documents, 1)
 
     def test_load_private_open_clip_model_from_a_private_ckpt_on_s3(self):
-        model = "private-marqo-fashion-siglip-model-on-s3"
+        model = "private-marqo-fashion-clip-model-ckpt-on-s3"
         model_properties = {
-            "dimensions": 768,
-            "name": "ViT-B-16-SigLIP",
+            "dimensions": 512,
+            "name": "ViT-B-16",
             "type": "open_clip",
-            "imagePreprocessor": "SigLIP",
             "modelLocation": {
                 "s3": {
                     "Bucket": "marqo-opensource-private-model-tests",
-                    "Key": "private-marqo-fashion-siglip-ckpt.bin"
+                    "Key": "private-fashion-clip-ckpt.bin"
                 },
                 "auth_required": True
             }
@@ -128,7 +129,7 @@ def test_load_private_open_clip_model_from_a_private_ckpt_on_s3(self):
             docs=[{
                 "id": "1",
                 "text": "This is a test document.",
-                "image": TestImageUrls.IMAGE2
+                "image": str(TestImageUrls.IMAGE2)
             }],
             tensor_fields=["text", "image"],
             model_auth={
@@ -139,13 +140,15 @@ def test_load_private_open_clip_model_from_a_private_ckpt_on_s3(self):
             }
         )
         res = self.add_documents(self.config, add_docs_params=add_docs_params)
-        self.assertEqual(res.status_code, 200)
+        self.assertEqual(False, res.errors)
+        self.assertEqual(self.monitoring.get_index_stats_by_name(self.index_name).number_of_documents, 1)
+        self.assertEqual(self.monitoring.get_index_stats_by_name(self.index_name).number_of_vectors, 2)
 
     def test_load_private_open_clip_model_from_a_private_ckpt_on_hf(self):
         model = "private-marqo-fashion-siglip-model-ckpt-on-hf"
         model_properties = {
             "dimensions": 768,
-            "name": "ViT-B-16",
+            "name": "ViT-B-16-SigLIP",
             "type": "open_clip",
             "modelLocation": {
                 "hf": {
@@ -161,7 +164,7 @@ def test_load_private_open_clip_model_from_a_private_ckpt_on_hf(self):
             docs=[{
                 "id": "1",
                 "text": "This is a test document.",
-                "image": TestImageUrls.IMAGE2
+                "image": str(TestImageUrls.IMAGE2)
             }],
             tensor_fields=["text", "image"],
             model_auth={
@@ -169,4 +172,6 @@ def test_load_private_open_clip_model_from_a_private_ckpt_on_hf(self):
             }
         )
         res = self.add_documents(self.config, add_docs_params=add_docs_params)
-        self.assertEqual(res.status_code, 200)
\ No newline at end of file
+        self.assertEqual(False, res.errors)
+        self.assertEqual(self.monitoring.get_index_stats_by_name(self.index_name).number_of_documents, 1)
+        self.assertEqual(self.monitoring.get_index_stats_by_name(self.index_name).number_of_vectors, 2)
\ No newline at end of file
diff --git a/tests/marqo_test.py b/tests/marqo_test.py
index 2eaae9f35..13448c63f 100644
--- a/tests/marqo_test.py
+++ b/tests/marqo_test.py
@@ -23,7 +23,7 @@
 from marqo.vespa.vespa_client import VespaClient
 
 
-class TestImageUrls(Enum):
+class TestImageUrls(str, Enum):
     __test__ = False  # Prevent pytest from collecting this class as a test
     IMAGE0 = 'https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image0.jpg'
     IMAGE1 = 'https://raw.githubusercontent.com/marqo-ai/marqo/mainline/examples/ImageSearchGuide/data/image1.jpg'

From 10f73dcc29db1db926a7c54183714756d9443e8a Mon Sep 17 00:00:00 2001
From: Li Wan <li@marqo.ai>
Date: Mon, 21 Oct 2024 15:43:48 +1100
Subject: [PATCH 50/63] Add poolingMethod: mean to bge models

---
 src/marqo/s2_inference/model_registry.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/marqo/s2_inference/model_registry.py b/src/marqo/s2_inference/model_registry.py
index dc06d74d1..80019fe30 100644
--- a/src/marqo/s2_inference/model_registry.py
+++ b/src/marqo/s2_inference/model_registry.py
@@ -806,12 +806,14 @@ def _get_hf_properties() -> Dict:
                  "tokens": 512,
                  "type": "hf",
                  "text_query_prefix": "Represent this sentence for searching relevant passages: ",
+                 "poolingMethod": "mean",
                  "notes": ""},
             "hf/bge-base-en-v1.5":
                 {"name": 'BAAI/bge-base-en-v1.5',
                  "dimensions": 768,
                  "tokens": 512,
                  "type": "hf",
+                 "poolingMethod": "mean",
                  "text_query_prefix": "Represent this sentence for searching relevant passages: ",
                  "notes": ""},
             "hf/bge-large-en-v1.5":
@@ -819,6 +821,7 @@ def _get_hf_properties() -> Dict:
                  "dimensions": 1024,
                  "tokens": 512,
                  "type": "hf",
+                 "poolingMethod": "mean",
                  "text_query_prefix": "Represent this sentence for searching relevant passages: ",
                  "notes": ""},
             "hf/bge-small-zh-v1.5":
@@ -826,6 +829,7 @@ def _get_hf_properties() -> Dict:
                  "dimensions": 512,
                  "tokens": 512,
                  "type": "hf",
+                 "poolingMethod": "mean",
                  "text_query_prefix": "为这个句子生成表示以用于检索相关文章：",
                  "notes": ""},
             "hf/bge-base-zh-v1.5":
@@ -833,6 +837,7 @@ def _get_hf_properties() -> Dict:
                  "dimensions": 768,
                  "tokens": 512,
                  "type": "hf",
+                 "poolingMethod": "mean",
                  "text_query_prefix": "为这个句子生成表示以用于检索相关文章：",
                  "notes": ""},
             "hf/bge-large-zh-v1.5":
@@ -840,6 +845,7 @@ def _get_hf_properties() -> Dict:
                  "dimensions": 1024,
                  "tokens": 512,
                  "type": "hf",
+                 "poolingMethod": "mean",
                  "text_query_prefix": "为这个句子生成表示以用于检索相关文章：",
                  "notes": ""},
             # Uncomment when fix is implemented.

From 5c0f0dd4a14fa682ba16f1970119bd62c35a177c Mon Sep 17 00:00:00 2001
From: Li Wan <li@marqo.ai>
Date: Mon, 21 Oct 2024 15:52:14 +1100
Subject: [PATCH 51/63] Remove unused code

---
 src/marqo/core/inference/enums.py             |  8 ------
 .../inference_models/abstract_clip_model.py   | 28 -------------------
 2 files changed, 36 deletions(-)
 delete mode 100644 src/marqo/core/inference/enums.py

diff --git a/src/marqo/core/inference/enums.py b/src/marqo/core/inference/enums.py
deleted file mode 100644
index 6c643f1a6..000000000
--- a/src/marqo/core/inference/enums.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from enum import Enum
-
-
-class Modality(str, Enum):
-    TEXT = "language"
-    IMAGE = "image"
-    VIDEO = "video"
-    AUDIO = "audio"
\ No newline at end of file
diff --git a/src/marqo/core/inference/inference_models/abstract_clip_model.py b/src/marqo/core/inference/inference_models/abstract_clip_model.py
index 06662bf11..047aeba50 100644
--- a/src/marqo/core/inference/inference_models/abstract_clip_model.py
+++ b/src/marqo/core/inference/inference_models/abstract_clip_model.py
@@ -3,15 +3,12 @@
 import numpy as np
 from PIL import UnidentifiedImageError
 
-from marqo.core.exceptions import InternalError
 from marqo.core.inference.inference_models.abstract_embedding_model import AbstractEmbeddingModel
 from marqo.s2_inference.types import *
 from marqo.core.inference.inference_models.image_download import (_is_image, format_and_load_CLIP_images,
                                                                   format_and_load_CLIP_image)
 from marqo.s2_inference.logger import get_logger
 import torch
-from marqo.core.inference.enums import Modality
-from marqo.s2_inference.errors import UnsupportedModalityError
 
 logger = get_logger(__name__)
 
@@ -53,31 +50,6 @@ def encode_text(self, inputs: Union[str, List[str]], normalize: bool = True) ->
     def encode_image(self, inputs, normalize: bool = True, image_download_headers: dict = None) -> np.ndarray:
         pass
 
-    def _validate_and_set_modality(self, modality: Optional[Modality] = None) -> Modality:
-        if modality is None:
-            return Modality.TEXT
-        elif modality in [Modality.TEXT, Modality.IMAGE]:
-            return modality
-        else:
-            raise UnidentifiedImageError(
-                f"The model expected modality to be one of {Modality.TEXT} "
-                f"or {Modality.IMAGE} but received {modality}."
-        )
-
-    def _validate_content_type(self, content: Any, modality: Modality) -> None:
-        """Validate if the provided content type is valid for the specific model and if it matches the modality.
-
-        Args:
-            content (Any): The content to validate.
-            modality (Modality): The modality of the content.
-
-        Raises:
-            ValueError: If the content type is not valid.
-        """
-
-        # TODO: Implement this method
-        pass
-
     def encode(self, inputs: Union[str, ImageType, List[Union[str, ImageType]]],
                default: str = 'text', normalize=True, **kwargs) -> np.ndarray:
         infer = kwargs.pop('infer', True)

From 213a02c7691f35dbdf49c058d831015cbc0b3d20 Mon Sep 17 00:00:00 2001
From: Li Wan <li@marqo.ai>
Date: Mon, 21 Oct 2024 16:05:27 +1100
Subject: [PATCH 52/63] Add model_auth regression fix

---
 .../inference_models/abstract_clip_model.py        |  9 +++++----
 .../inference/inference_models/open_clip_model.py  | 14 ++++++--------
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/marqo/core/inference/inference_models/abstract_clip_model.py b/src/marqo/core/inference/inference_models/abstract_clip_model.py
index 047aeba50..09e47b7d3 100644
--- a/src/marqo/core/inference/inference_models/abstract_clip_model.py
+++ b/src/marqo/core/inference/inference_models/abstract_clip_model.py
@@ -1,14 +1,15 @@
 from abc import abstractmethod
 
 import numpy as np
+import torch
 from PIL import UnidentifiedImageError
 
 from marqo.core.inference.inference_models.abstract_embedding_model import AbstractEmbeddingModel
-from marqo.s2_inference.types import *
 from marqo.core.inference.inference_models.image_download import (_is_image, format_and_load_CLIP_images,
                                                                   format_and_load_CLIP_image)
 from marqo.s2_inference.logger import get_logger
-import torch
+from marqo.s2_inference.types import *
+from marqo.tensor_search.models.private_models import ModelAuth
 
 logger = get_logger(__name__)
 
@@ -26,14 +27,14 @@ class AbstractCLIPModel(AbstractEmbeddingModel):
     """
 
     def __init__(self, device: Optional[str] = None, model_properties: Optional[dict] = None,
-                 model_auth: Optional[dict] = None):
+                 model_auth: Optional[ModelAuth] = None):
         """Instantiate the abstract CLIP model.
 
         Args:
             device (str): The device to load the model on, typically 'cpu' or 'cuda'.
             model_properties (dict): A dictionary containing additional properties or configurations
                 specific to the model. Defaults to an empty dictionary if not provided.
-            model_auth (dict): The authentication information for the model. Defaults to `None` if not provided
+            model_auth (ModelAuth): The authentication information for the model. Defaults to `None` if not provided
         """
 
         super().__init__(model_properties, device, model_auth)
diff --git a/src/marqo/core/inference/inference_models/open_clip_model.py b/src/marqo/core/inference/inference_models/open_clip_model.py
index 62ced98f6..59f67bb4c 100644
--- a/src/marqo/core/inference/inference_models/open_clip_model.py
+++ b/src/marqo/core/inference/inference_models/open_clip_model.py
@@ -4,19 +4,19 @@
 import torch
 from open_clip.pretrained import _pcfg, _slpcfg, _apcfg
 from open_clip.transform import image_transform_v2, PreprocessCfg, merge_preprocess_dict
+from pydantic import ValidationError
 from torchvision.transforms import Compose
 
 from marqo import marqo_docs
 from marqo.core.inference.inference_models.abstract_clip_model import AbstractCLIPModel
+from marqo.core.inference.inference_models.hf_tokenizer import HFTokenizer
 from marqo.core.inference.inference_models.open_clip_model_properties import OpenCLIPModelProperties, ImagePreprocessor
+from marqo.core.inference.model_download import download_model
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.errors import InvalidModelPropertiesError
 from marqo.s2_inference.logger import get_logger
-from marqo.core.inference.inference_models.hf_tokenizer import HFTokenizer
-from marqo.core.inference.model_download import download_model
 from marqo.s2_inference.types import *
-from marqo.tensor_search.models.private_models import ModelLocation
-from pydantic import ValidationError
+from marqo.tensor_search.models.private_models import ModelAuth, ModelLocation
 
 logger = get_logger(__name__)
 
@@ -29,14 +29,12 @@ def __init__(
             self,
             device: Optional[str] = None,
             model_properties: Optional[Dict] = None,
-            model_auth: Optional[Dict] = None,
+            model_auth: Optional[ModelAuth] = None,
     ) -> None:
 
         super().__init__(device, model_properties, model_auth)
 
         self.model_properties = self._build_model_properties(model_properties)
-
-        # model_auth gets passed through add_docs and search requests:
         self.preprocess_config = None
 
     def _build_model_properties(self, model_properties: dict) -> OpenCLIPModelProperties:
@@ -233,7 +231,7 @@ def _download_from_repo(self):
         download_model_params = {"repo_location": model_location}
 
         if model_location.auth_required:
-            download_model_params['auth'] = self.model_properties.model_auth
+            download_model_params['auth'] = self.model_auth
 
         model_file_path = download_model(**download_model_params)
         if model_file_path is None or model_file_path == '':

From c27d567e2ba49ea003758d3c95018dd3a93077c3 Mon Sep 17 00:00:00 2001
From: Li Wan <li@marqo.ai>
Date: Mon, 21 Oct 2024 18:30:26 +1100
Subject: [PATCH 53/63] Add back the localpath for open_clip model properties:

---
 .../inference_models/open_clip_model.py       | 13 +++++-----
 .../open_clip_model_properties.py             | 18 +++++++++++--
 .../inference}/test_marqo_fashion_clip.py     |  0
 .../inference}/test_open_clip_model_load.py   | 25 +++++++++++++++++++
 .../s2_inference/open_clip_models/__init__.py |  0
 5 files changed, 47 insertions(+), 9 deletions(-)
 rename tests/{s2_inference/open_clip_models => core/inference}/test_marqo_fashion_clip.py (100%)
 rename tests/{s2_inference/open_clip_models => core/inference}/test_open_clip_model_load.py (88%)
 delete mode 100644 tests/s2_inference/open_clip_models/__init__.py

diff --git a/src/marqo/core/inference/inference_models/open_clip_model.py b/src/marqo/core/inference/inference_models/open_clip_model.py
index 59f67bb4c..460a98398 100644
--- a/src/marqo/core/inference/inference_models/open_clip_model.py
+++ b/src/marqo/core/inference/inference_models/open_clip_model.py
@@ -12,6 +12,7 @@
 from marqo.core.inference.inference_models.hf_tokenizer import HFTokenizer
 from marqo.core.inference.inference_models.open_clip_model_properties import OpenCLIPModelProperties, ImagePreprocessor
 from marqo.core.inference.model_download import download_model
+from marqo.exceptions import InternalError
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.errors import InvalidModelPropertiesError
 from marqo.s2_inference.logger import get_logger
@@ -47,7 +48,8 @@ def _build_model_properties(self, model_properties: dict) -> OpenCLIPModelProper
 
     def _load_necessary_components(self) -> None:
         """Load the open_clip model and tokenizer."""
-        if self.model_properties.url is not None or self.model_properties.model_location is not None:
+        if self.model_properties.url is not None or self.model_properties.model_location is not None or \
+                self.model_properties.localpath is not None:
             self.model, self.preprocess = self._load_model_and_image_preprocessor_from_checkpoint()
             self.tokenizer = self._load_tokenizer_from_checkpoint()
         elif self.model_properties.name.startswith(HF_HUB_PREFIX):
@@ -108,17 +110,14 @@ def _load_model_and_image_preprocessor_from_checkpoint(self) -> Tuple[torch.nn.M
         The checkpoint file can be provided through a URL or a model_location object.
         """
         # Load the image preprocessor
-        if self.model_properties.url and self.model_properties.model_location:
-            raise InvalidModelPropertiesError(
-                "Only one of url, model_location can be specified in 'model_properties' "
-            )
+        if self.model_properties.localpath:
+            self.model_path = self.model_properties.localpath
         elif self.model_properties.model_location:
             self.model_path = self._download_from_repo()
         elif self.model_properties.url:
             self.model_path = download_model(url=self.model_properties.url)
         else:
-            raise ValueError("The 'url' or 'model_location' is required in 'model_properties' "
-                             "when loading a custom open_clip model through a URL or a model_location object")
+            raise InternalError("One of 'localpath', 'model_location', or 'url' must be provided to load the model.")
 
         logger.info(f"The name of the custom clip model is {self.model_properties.name}. We use open_clip loader")
 
diff --git a/src/marqo/core/inference/inference_models/open_clip_model_properties.py b/src/marqo/core/inference/inference_models/open_clip_model_properties.py
index 53200a47a..e034f174f 100644
--- a/src/marqo/core/inference/inference_models/open_clip_model_properties.py
+++ b/src/marqo/core/inference/inference_models/open_clip_model_properties.py
@@ -2,7 +2,7 @@
 from enum import Enum
 from typing import Optional, List
 
-from pydantic import Field
+from pydantic import Field, root_validator
 
 from marqo.base_model import MarqoBaseModel
 from marqo.tensor_search.models.private_models import ModelLocation, ModelAuth
@@ -33,6 +33,7 @@ class OpenCLIPModelProperties(MarqoBaseModel):
         jit: A boolean indicating whether the model is JIT compiled.
         precision: The precision of the model. It should be either 'fp32' or 'fp16'.
         url: The URL of the model checkpoint. It is optional.
+        localpath: The local path of the model checkpoint. It is optional.
         model_location: The location of the model. It is optional.
         tokenizer: The name of the _tokenizer. It is optional.
         model_auth: The authentication information for the model. It is optional.
@@ -52,6 +53,7 @@ class OpenCLIPModelProperties(MarqoBaseModel):
     jit: bool = False
     precision: Precision = Precision.FP32
     url: Optional[str] = None
+    localpath: Optional[str] = None
     model_location: Optional[ModelLocation] = Field(default=None, alias="modelLocation")
     tokenizer: Optional[str] = None
     model_auth: Optional[ModelAuth] = Field(default=None, alias="modelAuth")
@@ -60,4 +62,16 @@ class OpenCLIPModelProperties(MarqoBaseModel):
     std: Optional[List[float]] = None
     size: Optional[int] = None
     note: Optional[str] = None
-    pretrained: Optional[str] = None
\ No newline at end of file
+    pretrained: Optional[str] = None
+
+    @root_validator(pre=False, skip_on_failure=True)
+    def _validate_custom_loading_fields(cls, values):
+        url = values.get("url")
+        localpath = values.get("localpath")
+        model_location = values.get("model_location")
+
+        provided_fields = sum(1 for field in [url, localpath, model_location] if field is not None)
+        if provided_fields > 1:
+            raise ValueError("Only one of 'url', 'localpath', or 'model_location' should be provided.")
+
+        return values
diff --git a/tests/s2_inference/open_clip_models/test_marqo_fashion_clip.py b/tests/core/inference/test_marqo_fashion_clip.py
similarity index 100%
rename from tests/s2_inference/open_clip_models/test_marqo_fashion_clip.py
rename to tests/core/inference/test_marqo_fashion_clip.py
diff --git a/tests/s2_inference/open_clip_models/test_open_clip_model_load.py b/tests/core/inference/test_open_clip_model_load.py
similarity index 88%
rename from tests/s2_inference/open_clip_models/test_open_clip_model_load.py
rename to tests/core/inference/test_open_clip_model_load.py
index 621322d9b..6609c2a23 100644
--- a/tests/s2_inference/open_clip_models/test_open_clip_model_load.py
+++ b/tests/core/inference/test_open_clip_model_load.py
@@ -178,3 +178,28 @@ def test_load_OpenCLIPModel_unsupported_image_preprocessor(self):
             model.load()
 
         self.assertIn("permitted: 'SigLIP', 'OpenAI', 'OpenCLIP', 'CLIPA'", str(context.exception))
+
+    def test_load_OpenCLIPModel_from_local_path(self):
+        """Test loading an OpenCLIP model from a local path."""
+        model_tag = "my_test_model"
+        model_properties = {
+            "name": "ViT-B-32",
+            "localpath": "/path/to/my_test_model.pt",
+            "dimension": 512,
+            "type": "open_clip"
+        }
+        with patch("marqo.core.inference.inference_models.open_clip_model.open_clip.create_model", return_value=MagicMock()) \
+                as mock_create_model:
+            with patch("marqo.core.inference.inference_models.open_clip_model.open_clip.get_tokenizer", return_value=MagicMock()) \
+                    as mock_tokenizer:
+                with patch.object(MagicMock(), 'eval', return_value=None) as mock_eval:
+                    model = OPEN_CLIP(model_properties=model_properties, device="cpu")
+                    model.load()
+                    mock_create_model.assert_called_once_with(
+                        model_name="ViT-B-32",
+                        jit=False,
+                        pretrained="/path/to/my_test_model.pt",
+                        precision="fp32", device="cpu",
+                        cache_dir=ModelCache.clip_cache_path
+                    )
+                    mock_tokenizer.assert_called_once_with("ViT-B-32")
\ No newline at end of file
diff --git a/tests/s2_inference/open_clip_models/__init__.py b/tests/s2_inference/open_clip_models/__init__.py
deleted file mode 100644
index e69de29bb..000000000

From 08331201cf7f28f92231624d36c91ea15ae414a1 Mon Sep 17 00:00:00 2001
From: Li Wan <li@marqo.ai>
Date: Mon, 21 Oct 2024 18:36:21 +1100
Subject: [PATCH 54/63] Add validation for dimensions

---
 .../inference/inference_models/hugging_face_model_properties.py | 2 ++
 .../inference/inference_models/open_clip_model_properties.py    | 1 +
 2 files changed, 3 insertions(+)

diff --git a/src/marqo/core/inference/inference_models/hugging_face_model_properties.py b/src/marqo/core/inference/inference_models/hugging_face_model_properties.py
index 925a251c6..226b9df03 100644
--- a/src/marqo/core/inference/inference_models/hugging_face_model_properties.py
+++ b/src/marqo/core/inference/inference_models/hugging_face_model_properties.py
@@ -31,6 +31,7 @@ class HuggingFaceModelProperties(MarqoBaseModel):
         token: The token length of the model. It is default to 128.
         type: The type of the model. It should be "hf".
         url: The URL of the model checkpoint. It is optional.
+        dimensions: The dimensions of the model.
         model_location: The location of the model. It is optional.
         model_auth: The authentication information for the model. It is optional.
         note: A note about the model. It is optional.
@@ -39,6 +40,7 @@ class HuggingFaceModelProperties(MarqoBaseModel):
     name: Optional[str] = None
     token: int = 128
     type: str
+    dimensions: int = Field(..., ge=1)
     url: Optional[str] = None
     model_location: Optional[ModelLocation] = Field(default=None, alias="modelLocation")
     model_auth: Optional[ModelAuth] = Field(default=None, alias="modelAuth")
diff --git a/src/marqo/core/inference/inference_models/open_clip_model_properties.py b/src/marqo/core/inference/inference_models/open_clip_model_properties.py
index e034f174f..a88ddb1b4 100644
--- a/src/marqo/core/inference/inference_models/open_clip_model_properties.py
+++ b/src/marqo/core/inference/inference_models/open_clip_model_properties.py
@@ -50,6 +50,7 @@ class OpenCLIPModelProperties(MarqoBaseModel):
     """
     name: str
     type: str
+    dimensions: int = Field(..., ge=1)
     jit: bool = False
     precision: Precision = Precision.FP32
     url: Optional[str] = None

From e1f3e33a4f5c27d67b43d071e0b9cd2fdad6e971 Mon Sep 17 00:00:00 2001
From: Li Wan <li@marqo.ai>
Date: Mon, 21 Oct 2024 18:56:04 +1100
Subject: [PATCH 55/63] Add os.path.exists tests

---
 .../inference_models/open_clip_model.py       |  9 +++-
 .../inference/test_open_clip_model_load.py    | 44 ++++++++++++-------
 2 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/src/marqo/core/inference/inference_models/open_clip_model.py b/src/marqo/core/inference/inference_models/open_clip_model.py
index 460a98398..806551abb 100644
--- a/src/marqo/core/inference/inference_models/open_clip_model.py
+++ b/src/marqo/core/inference/inference_models/open_clip_model.py
@@ -111,7 +111,14 @@ def _load_model_and_image_preprocessor_from_checkpoint(self) -> Tuple[torch.nn.M
         """
         # Load the image preprocessor
         if self.model_properties.localpath:
-            self.model_path = self.model_properties.localpath
+            if os.path.exists(self.model_properties.localpath):
+                self.model_path = self.model_properties.localpath
+            else:
+                raise InvalidModelPropertiesError(
+                    f"The localpath '{self.model_properties.localpath}' does not exist. "
+                    f"Please provide a valid localpath to load the model. If you are running Marqo in a container, "
+                    f"make sure the localpath is mounted correctly."
+                )
         elif self.model_properties.model_location:
             self.model_path = self._download_from_repo()
         elif self.model_properties.url:
diff --git a/tests/core/inference/test_open_clip_model_load.py b/tests/core/inference/test_open_clip_model_load.py
index 6609c2a23..d27c6dc4c 100644
--- a/tests/core/inference/test_open_clip_model_load.py
+++ b/tests/core/inference/test_open_clip_model_load.py
@@ -1,6 +1,8 @@
 from unittest import TestCase
 from unittest.mock import patch, MagicMock
 
+import pytest
+
 from marqo.core.inference.inference_models.open_clip_model import OPEN_CLIP
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.errors import InvalidModelPropertiesError
@@ -8,7 +10,7 @@
 
 OPEN_CLIP_MODEL_PROPERTIES = _get_open_clip_properties()
 
-
+@pytest.mark.unittest
 class TestOpenCLIPModelLoad(TestCase):
     """A test suite for loading OpenCLIP models.
 
@@ -25,7 +27,8 @@ def test_load_OpenCLIPModelFromCheckPointMethod_success(self):
         model_properties = {
             "name": "ViT-B-32",
             "url": "https://openclipart.org/download/12345/my_test_model.pt",
-            "type": "open_clip"
+            "type": "open_clip",
+            "dimensions": 512
         }
 
         with patch("marqo.core.inference.inference_models.open_clip_model.OPEN_CLIP._load_model_and_image_preprocessor_from_checkpoint", \
@@ -44,7 +47,8 @@ def test_load_OpenCLIPModelFromCheckPointParameters_success(self):
         model_properties = {
             "name": "ViT-B-108",
             "url": "https://openclipart.org/download/12345/my_test_model.pt",
-            "type": "open_clip"
+            "type": "open_clip",
+            "dimensions": 512
         }
         with patch("marqo.core.inference.inference_models.open_clip_model.open_clip.create_model", return_value=MagicMock()) \
                 as mock_create_model:
@@ -75,6 +79,7 @@ def test_load_OpenCLIPModelFromCheckPointPreprocessConfig(self):
         """Test correct parameters are passed to the OpenCLIP loading from checkpoint method."""
         model_tag = "my_test_model"
         model_properties = {
+            "dimensions": 512,
             "name": "test-siglip",
             "url": "https://openclipart.org/download/12345/my_test_model.pt",
             "type": "open_clip",
@@ -111,6 +116,7 @@ def test_open_clip_load_fromHuggingFaceHub_success(self):
         model_properties = {
             "name": "hf-hub:my_test_hub",
             "type": "open_clip",
+            "dimensions": 512
         }
         with patch("marqo.s2_inference.clip_utils.open_clip.create_model_and_transforms",
                    return_value=(MagicMock(), MagicMock(), MagicMock())) \
@@ -132,6 +138,7 @@ def test_open_clip_load_fromMarqoModelRegistry_success(self):
         model_properties = {
             "name": "open_clip/ViT-B-32/laion5b_s13b_b90k",
             "type": "open_clip",
+            "dimensions": 512
         }
         with patch("marqo.s2_inference.clip_utils.open_clip.create_model_and_transforms",
                    return_value=(MagicMock(), MagicMock(), MagicMock())) \
@@ -153,7 +160,8 @@ def test_load_OpenCLIPModel_missing_model_properties(self):
         """Test loading an OpenCLIP model with missing model properties should raise an error."""
         model_tag = "my_test_model"
         model_properties = {
-            "type": "open_clip"
+            "type": "open_clip",
+            "dimensions": 512
             # Missing 'name' and 'url'
         }
 
@@ -170,7 +178,8 @@ def test_load_OpenCLIPModel_unsupported_image_preprocessor(self):
         model_properties = {
             "name": "ViT-B-32",
             "type": "open_clip",
-            "image_preprocessor": "UnsupportedPreprocessor"
+            "image_preprocessor": "UnsupportedPreprocessor",
+            "dimensions": 512
         }
 
         with self.assertRaises(InvalidModelPropertiesError) as context:
@@ -185,7 +194,7 @@ def test_load_OpenCLIPModel_from_local_path(self):
         model_properties = {
             "name": "ViT-B-32",
             "localpath": "/path/to/my_test_model.pt",
-            "dimension": 512,
+            "dimensions": 512,
             "type": "open_clip"
         }
         with patch("marqo.core.inference.inference_models.open_clip_model.open_clip.create_model", return_value=MagicMock()) \
@@ -193,13 +202,16 @@ def test_load_OpenCLIPModel_from_local_path(self):
             with patch("marqo.core.inference.inference_models.open_clip_model.open_clip.get_tokenizer", return_value=MagicMock()) \
                     as mock_tokenizer:
                 with patch.object(MagicMock(), 'eval', return_value=None) as mock_eval:
-                    model = OPEN_CLIP(model_properties=model_properties, device="cpu")
-                    model.load()
-                    mock_create_model.assert_called_once_with(
-                        model_name="ViT-B-32",
-                        jit=False,
-                        pretrained="/path/to/my_test_model.pt",
-                        precision="fp32", device="cpu",
-                        cache_dir=ModelCache.clip_cache_path
-                    )
-                    mock_tokenizer.assert_called_once_with("ViT-B-32")
\ No newline at end of file
+                    with patch("marqo.core.inference.inference_models.open_clip_model.os.path.exists",
+                               return_value=True) as mock_path_exists:
+                        model = OPEN_CLIP(model_properties=model_properties, device="cpu")
+                        model.load()
+                        mock_create_model.assert_called_once_with(
+                            model_name="ViT-B-32",
+                            jit=False,
+                            pretrained="/path/to/my_test_model.pt",
+                            precision="fp32", device="cpu",
+                            cache_dir=ModelCache.clip_cache_path
+                        )
+                        mock_tokenizer.assert_called_once_with("ViT-B-32")
+                        mock_path_exists.assert_called_once_with("/path/to/my_test_model.pt")
\ No newline at end of file

From 20b6c0b64bba8d55c3bf6e6de9ee677495f1d5d1 Mon Sep 17 00:00:00 2001
From: Li Wan <li@marqo.ai>
Date: Mon, 21 Oct 2024 19:19:20 +1100
Subject: [PATCH 56/63] Add dimensions for model properties tests

---
 .../core/inference/test_hugging_face_model.py | 10 +++++--
 .../test_hugging_face_model_properties.py     | 29 ++++++++++---------
 2 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/tests/core/inference/test_hugging_face_model.py b/tests/core/inference/test_hugging_face_model.py
index 2535ced5a..aafe2c1f8 100644
--- a/tests/core/inference/test_hugging_face_model.py
+++ b/tests/core/inference/test_hugging_face_model.py
@@ -540,7 +540,8 @@ def test_initialize_huggingface_model(self):
         model_properties = {
             "name": "test-model",
             "type": "hf",
-            "poolingMethod": "mean"
+            "poolingMethod": "mean",
+            "dimensions": 768
         }
         model = HuggingFaceModel(model_properties, "cpu", {})
         self.assertIsInstance(model, HuggingFaceModel)
@@ -548,6 +549,7 @@ def test_initialize_huggingface_model(self):
         self.assertEqual({}, model.model_auth)
         self.assertEqual("test-model", model.model_properties.name)
         self.assertEqual("hf", model.model_properties.type)
+        self.assertEqual(768, model.model_properties.dimensions)
         self.assertEqual(PoolingMethod.Mean, model.model_properties.pooling_method)
 
     def test_initialize_huggingface_model_with_invalid_properties(self):
@@ -656,7 +658,8 @@ def test_tokenizer_loading_failure(self, mock_auto_model, mock_auto_tokenizer):
         model_properties = {
             "name": "test-model",
             "type": "hf",
-            "poolingMethod": "mean"
+            "poolingMethod": "mean",
+            "dimensions": 768
         }
 
         with self.assertRaises(InvalidModelPropertiesError) as excinfo:
@@ -672,7 +675,8 @@ def test_model_loading_failure(self, mock_auto_model, mock_auto_tokenizer):
         model_properties = {
             "name": "test-model",
             "type": "hf",
-            "poolingMethod": "mean"
+            "poolingMethod": "mean",
+            "dimensions": 768
         }
 
         with self.assertRaises(InvalidModelPropertiesError) as excinfo:
diff --git a/tests/core/inference/test_hugging_face_model_properties.py b/tests/core/inference/test_hugging_face_model_properties.py
index 0d81ae897..9d27e94ee 100644
--- a/tests/core/inference/test_hugging_face_model_properties.py
+++ b/tests/core/inference/test_hugging_face_model_properties.py
@@ -11,7 +11,7 @@
 class TestHuggingFaceModelProperties(unittest.TestCase):
 
     def test_valid_model_with_mandatory_fields(self):
-        model = HuggingFaceModelProperties(name="test-model", type="hf")
+        model = HuggingFaceModelProperties(name="test-model", type="hf", dimensions=768)
         self.assertEqual(model.name, "test-model")
         self.assertEqual(model.token, 128)
         self.assertEqual(model.type, "hf")
@@ -19,17 +19,17 @@ def test_valid_model_with_mandatory_fields(self):
 
     def test_invalid_type(self):
         with self.assertRaises(ValidationError) as excinfo:
-            HuggingFaceModelProperties(name="test-model", type="invalid_type")
+            HuggingFaceModelProperties(name="test-model", type="invalid_type", dimensions=768)
         self.assertIn("The type of the model should be 'hf'", str(excinfo.exception))
 
     def test_valid_model_with_url(self):
-        model = HuggingFaceModelProperties(name="test-model", type="hf", url="http://example.com")
+        model = HuggingFaceModelProperties(name="test-model", type="hf", url="http://example.com", dimensions=768)
         self.assertEqual(model.url, "http://example.com")
         self.assertIsNone(model.model_location)
 
     def test_valid_model_with_model_location(self):
         model_location = ModelLocation(hf=HfModelLocation(repo_id="test-repo-id", filename="test-filename"))
-        model = HuggingFaceModelProperties(name="test-model", type="hf", model_location=model_location)
+        model = HuggingFaceModelProperties(name="test-model", type="hf", model_location=model_location, dimensions=768)
         self.assertEqual(model.model_location, model_location)
         self.assertIsNone(model.url)
 
@@ -38,36 +38,37 @@ def test_invalid_model_with_url_and_model_location(self):
             HuggingFaceModelProperties(
                 name="test-model", type="hf",
                 url="http://example.com",
-                model_location=ModelLocation(hf=HfModelLocation(repo_id="test-repo-id", filename="test-filename"))
+                model_location=ModelLocation(hf=HfModelLocation(repo_id="test-repo-id", filename="test-filename")),
+                dimensions=768
             )
         self.assertIn("Only one of 'url' and 'model_location' should be provided.", str(excinfo.exception))
 
-    # Test for pooling method inference
     def test_infer_pooling_method(self):
         for pooling_method in (PoolingMethod.Mean, PoolingMethod.CLS):
             with self.subTest(f"Pooling method inferred from name with {pooling_method}"):
                 with mock.patch("marqo.core.inference.inference_models.hugging_face_model_properties."
                                 "HuggingFaceModelProperties._infer_pooling_method_from_name",
                                 return_value = pooling_method) as mock_infer:
-                    model = HuggingFaceModelProperties(name="model-with-cls", type="hf")
+                    model = HuggingFaceModelProperties(name="model-with-cls", type="hf", dimensions=768)
                 mock_infer.assert_called_once()
                 self.assertEqual(pooling_method, model.pooling_method)
 
     def test_explicit_valid_pooling_method(self):
         with mock.patch("marqo.core.inference.inference_models.hugging_face_model_properties."
                         "HuggingFaceModelProperties._infer_pooling_method_from_name") as mock_infer:
-            model = HuggingFaceModelProperties(name="test-model", type="hf", pooling_method=PoolingMethod.CLS)
+            model = HuggingFaceModelProperties(name="test-model", type="hf", pooling_method=PoolingMethod.CLS,
+                                               dimensions=768)
         self.assertEqual(model.pooling_method, PoolingMethod.CLS)
         mock_infer.assert_not_called()
 
     def test_explicit_invalid_pooling_method(self):
         with self.assertRaises(ValidationError) as excinfo:
-            _ = HuggingFaceModelProperties(name="test-model", type="hf", pooling_method="invalid")
+            _ = HuggingFaceModelProperties(name="test-model", type="hf", pooling_method="invalid", dimensions=768)
         self.assertIn("value is not a valid enumeration member; permitted: 'mean', 'cls'",
                       str(excinfo.exception))
 
     def test_model_without_optional_fields(self):
-        model = HuggingFaceModelProperties(name="test-model", type="hf")
+        model = HuggingFaceModelProperties(name="test-model", type="hf", dimensions=768)
         self.assertIsNone(model.url)
         self.assertIsNone(model.model_location)
         self.assertIsNone(model.model_auth)
@@ -76,17 +77,17 @@ def test_model_without_optional_fields(self):
 
     def test_invalid_model_without_minimum_fields(self):
         with self.assertRaises(ValidationError) as excinfo:
-            HuggingFaceModelProperties(type="hf")
+            HuggingFaceModelProperties(type="hf", dimensions=768)
         self.assertIn("At least one of 'name', 'url', or 'model_location' should be provided.", str(excinfo.exception))
 
     def test_invalid_model_with_both_url_and_model_location(self):
         model_location = ModelLocation(hf=HfModelLocation(repo_id="test-repo-id", filename="test-filename"))
         with self.assertRaises(ValidationError) as excinfo:
-            HuggingFaceModelProperties(url="http://example.com", model_location=model_location, type="hf")
+            HuggingFaceModelProperties(url="http://example.com", model_location=model_location, type="hf", dimensions=768)
         self.assertIn("Only one of 'url' and 'model_location' should be provided.", str(excinfo.exception))
 
     def test_valid_model_with_custom_url_and_inferred_pooling(self):
-        model = HuggingFaceModelProperties(url="http://example.com", type="hf", pooling_method=None)
+        model = HuggingFaceModelProperties(url="http://example.com", type="hf", pooling_method=None, dimensions=768)
         self.assertEqual(model.pooling_method, PoolingMethod.Mean)
 
     def test_some_pooling_method_infer_on_real_model(self):
@@ -103,5 +104,5 @@ def test_some_pooling_method_infer_on_real_model(self):
 
         for model_name, pooling_method in test_cases:
             with self.subTest(f"Pooling method inferred from name with {model_name}"):
-                model = HuggingFaceModelProperties(name=model_name, type="hf")
+                model = HuggingFaceModelProperties(name=model_name, type="hf", dimensions=768)
                 self.assertEqual(pooling_method, model.pooling_method)
\ No newline at end of file

From 9198acce03b77bff355ba8abbc35ce4415b35d31 Mon Sep 17 00:00:00 2001
From: Li Wan <li@marqo.ai>
Date: Tue, 22 Oct 2024 12:39:48 +1100
Subject: [PATCH 57/63] Add some extra tests for HF loader

---
 .../inference_models/hugging_face_model.py    |  2 +-
 .../hugging_face_model_properties.py          |  4 +-
 .../core/inference/test_hugging_face_model.py | 44 +++++++++++++++++++
 .../test_hugging_face_model_properties.py     | 30 +++++++++++--
 4 files changed, 73 insertions(+), 7 deletions(-)

diff --git a/src/marqo/core/inference/inference_models/hugging_face_model.py b/src/marqo/core/inference/inference_models/hugging_face_model.py
index 1b693054a..8e24e6e6c 100644
--- a/src/marqo/core/inference/inference_models/hugging_face_model.py
+++ b/src/marqo/core/inference/inference_models/hugging_face_model.py
@@ -157,7 +157,7 @@ def encode(self, sentence: Union[str, List[str]], normalize=True, **kwargs) -> U
             sentence,
             padding=True,
             truncation=True,
-            max_length=self.model_properties.token,
+            max_length=self.model_properties.tokens,
             return_tensors="pt"
         ).to(self.device)
 
diff --git a/src/marqo/core/inference/inference_models/hugging_face_model_properties.py b/src/marqo/core/inference/inference_models/hugging_face_model_properties.py
index 226b9df03..b8341c053 100644
--- a/src/marqo/core/inference/inference_models/hugging_face_model_properties.py
+++ b/src/marqo/core/inference/inference_models/hugging_face_model_properties.py
@@ -28,7 +28,7 @@ class HuggingFaceModelProperties(MarqoBaseModel):
         name: The name of the model. This will be used as the repo_id in the Hugging Face model hub.
             This attribute is neglected if 'url' or 'model_location' is provided.
             We are not raising an error right now as that would be a breaking change.
-        token: The token length of the model. It is default to 128.
+        tokens: The token length of the model. It is default to 128.
         type: The type of the model. It should be "hf".
         url: The URL of the model checkpoint. It is optional.
         dimensions: The dimensions of the model.
@@ -38,7 +38,7 @@ class HuggingFaceModelProperties(MarqoBaseModel):
         pooling_method: The pooling method for the model. It should be one of the values in the PoolingMethod enum.
     """
     name: Optional[str] = None
-    token: int = 128
+    tokens: int = 128
     type: str
     dimensions: int = Field(..., ge=1)
     url: Optional[str] = None
diff --git a/tests/core/inference/test_hugging_face_model.py b/tests/core/inference/test_hugging_face_model.py
index aafe2c1f8..028f7e34a 100644
--- a/tests/core/inference/test_hugging_face_model.py
+++ b/tests/core/inference/test_hugging_face_model.py
@@ -712,3 +712,47 @@ def test_sentence_transformers_nli_bert_base_cls_pooling_embeddings(self):
             f"Text embeddings for two different models are too close. "
             f"There is a problem with the test data or bug in the code."
         )
+
+    @mock.patch("transformers.AutoModel.from_pretrained", side_effect=mock.MagicMock())
+    @mock.patch("transformers.AutoTokenizer.from_pretrained", return_value=mock.MagicMock())
+    def test_instantiate_a_hugging_face_model_with_minimum_model_properties(self, mock_auto_model, mock_auto_tokenizer):
+        """Test that a model can be instantiated with the minimum required model properties."""
+        model_properties = {
+            "name": "test-model",
+            "type": "hf",
+            "dimensions": 512,
+        }
+
+        model = HuggingFaceModel(model_properties, "cpu", None)
+        self.assertIsInstance(model, HuggingFaceModel)
+        self.assertEqual("cpu", model.device)
+        self.assertEqual(None, model.model_auth)
+        self.assertEqual("test-model", model.model_properties.name)
+        self.assertEqual("hf", model.model_properties.type)
+        self.assertEqual(512, model.model_properties.dimensions)
+        self.assertEqual(128, model.model_properties.tokens)
+        self.assertEqual(PoolingMethod.Mean, model.model_properties.pooling_method)
+
+    @mock.patch("transformers.AutoModel.from_pretrained", side_effect=mock.MagicMock())
+    @mock.patch("transformers.AutoTokenizer.from_pretrained", return_value=mock.MagicMock())
+    def test_instantiate_a_hugging_face_model_with_custom_model_properties(self, mock_auto_model, mock_auto_tokenizer):
+        """Test that a model can be instantiated with custom model properties."""
+        model_properties = {
+            "name": "test-model",
+            "type": "hf",
+            "dimensions": 123,
+            "tokens": 456,
+            "poolingMethod": "cls",
+        }
+
+        model = HuggingFaceModel(model_properties, "cpu", None)
+        self.assertIsInstance(model, HuggingFaceModel)
+        self.assertEqual("cpu", model.device)
+        self.assertEqual(None, model.model_auth)
+        self.assertEqual("test-model", model.model_properties.name)
+        self.assertEqual("hf", model.model_properties.type)
+        self.assertEqual(123, model.model_properties.dimensions)
+        self.assertEqual(456, model.model_properties.tokens)
+        self.assertEqual(PoolingMethod.CLS, model.model_properties.pooling_method)
+
+
diff --git a/tests/core/inference/test_hugging_face_model_properties.py b/tests/core/inference/test_hugging_face_model_properties.py
index 9d27e94ee..b63ceee4e 100644
--- a/tests/core/inference/test_hugging_face_model_properties.py
+++ b/tests/core/inference/test_hugging_face_model_properties.py
@@ -11,11 +11,33 @@
 class TestHuggingFaceModelProperties(unittest.TestCase):
 
     def test_valid_model_with_mandatory_fields(self):
+        """A test for creating a valid HuggingFaceModelProperties object with only mandatory fields."""
         model = HuggingFaceModelProperties(name="test-model", type="hf", dimensions=768)
-        self.assertEqual(model.name, "test-model")
-        self.assertEqual(model.token, 128)
-        self.assertEqual(model.type, "hf")
-        self.assertEqual(model.pooling_method, PoolingMethod.Mean)
+        self.assertEqual("test-model", model.name)
+        self.assertEqual(128, model.tokens)
+        self.assertEqual("hf", model.type)
+        self.assertEqual(PoolingMethod.Mean, model.pooling_method)
+        self.assertEqual(768, model.dimensions)
+        self.assertIsNone(model.url)
+        self.assertIsNone(model.model_location)
+        self.assertIsNone(model.model_auth)
+        self.assertIsNone(model.note)
+
+    def test_valid_model_with_custom_fields(self):
+        """A test for creating a valid HuggingFaceModelProperties object with all fields."""
+        model = HuggingFaceModelProperties(
+            name="test-model", type="hf", dimensions=768,
+            tokens=256, pooling_method=PoolingMethod.CLS
+        )
+        self.assertEqual("test-model", model.name)
+        self.assertEqual(256, model.tokens)
+        self.assertEqual("hf", model.type)
+        self.assertEqual(PoolingMethod.CLS, model.pooling_method)
+        self.assertEqual(model.dimensions, 768)
+        self.assertIsNone(model.url)
+        self.assertIsNone(model.model_location)
+        self.assertIsNone(model.model_auth)
+        self.assertIsNone(model.note)
 
     def test_invalid_type(self):
         with self.assertRaises(ValidationError) as excinfo:

From 4b4a804274a748d88d3be42d1f54aa77b140ddd7 Mon Sep 17 00:00:00 2001
From: Li Wan <li@marqo.ai>
Date: Tue, 22 Oct 2024 12:49:39 +1100
Subject: [PATCH 58/63] Add secrets to largemodel unittests

---
 .github/workflows/largemodel_unit_test_CI.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/largemodel_unit_test_CI.yml b/.github/workflows/largemodel_unit_test_CI.yml
index ff8e63c22..f2a9abbfc 100644
--- a/.github/workflows/largemodel_unit_test_CI.yml
+++ b/.github/workflows/largemodel_unit_test_CI.yml
@@ -150,6 +150,10 @@ jobs:
           export MARQO_MAX_CPU_MODEL_MEMORY=15
           export MARQO_MAX_CUDA_MODEL_MEMORY=15
           
+          export PRIVATE_MODEL_TESTS_AWS_ACCESS_KEY_ID=${{ secrets.PRIVATE_MODEL_TESTS_AWS_ACCESS_KEY_ID }}
+          export PRIVATE_MODEL_TESTS_AWS_SECRET_ACCESS_KEY=${{ secrets.PRIVATE_MODEL_TESTS_AWS_SECRET_ACCESS_KEY }}
+          export PRIVATE_MODEL_TESTS_HF_TOKEN=${{ secrets.PRIVATE_MODEL_TESTS_HF_TOKEN }}
+          
           export PYTHONPATH="./marqo/tests:./marqo/src:./marqo"
           pytest marqo/tests --largemodel --ignore=marqo/tests/test_documentation.py
 

From 3f3cb4d94caf1a116dc4d822ecb1292538dc0ddb Mon Sep 17 00:00:00 2001
From: Li Wan <li@marqo.ai>
Date: Tue, 22 Oct 2024 15:10:40 +1100
Subject: [PATCH 59/63] Add model properties

---
 .../hugging_face_model_properties.py          |  2 -
 .../open_clip_model_properties.py             |  5 +-
 .../test_hugging_face_model_properties.py     | 56 ++++++++++++++--
 .../test_open_clip_model_properties.py        | 66 +++++++++++++++++++
 4 files changed, 116 insertions(+), 13 deletions(-)
 create mode 100644 tests/core/inference/test_open_clip_model_properties.py

diff --git a/src/marqo/core/inference/inference_models/hugging_face_model_properties.py b/src/marqo/core/inference/inference_models/hugging_face_model_properties.py
index b8341c053..239cec98e 100644
--- a/src/marqo/core/inference/inference_models/hugging_face_model_properties.py
+++ b/src/marqo/core/inference/inference_models/hugging_face_model_properties.py
@@ -33,7 +33,6 @@ class HuggingFaceModelProperties(MarqoBaseModel):
         url: The URL of the model checkpoint. It is optional.
         dimensions: The dimensions of the model.
         model_location: The location of the model. It is optional.
-        model_auth: The authentication information for the model. It is optional.
         note: A note about the model. It is optional.
         pooling_method: The pooling method for the model. It should be one of the values in the PoolingMethod enum.
     """
@@ -43,7 +42,6 @@ class HuggingFaceModelProperties(MarqoBaseModel):
     dimensions: int = Field(..., ge=1)
     url: Optional[str] = None
     model_location: Optional[ModelLocation] = Field(default=None, alias="modelLocation")
-    model_auth: Optional[ModelAuth] = Field(default=None, alias="modelAuth")
     note: Optional[str] = None
     pooling_method: PoolingMethod = Field(..., alias="poolingMethod")
 
diff --git a/src/marqo/core/inference/inference_models/open_clip_model_properties.py b/src/marqo/core/inference/inference_models/open_clip_model_properties.py
index a88ddb1b4..be2b45ba4 100644
--- a/src/marqo/core/inference/inference_models/open_clip_model_properties.py
+++ b/src/marqo/core/inference/inference_models/open_clip_model_properties.py
@@ -5,7 +5,7 @@
 from pydantic import Field, root_validator
 
 from marqo.base_model import MarqoBaseModel
-from marqo.tensor_search.models.private_models import ModelLocation, ModelAuth
+from marqo.tensor_search.models.private_models import ModelLocation
 
 
 class ImagePreprocessor(str, Enum):
@@ -36,7 +36,6 @@ class OpenCLIPModelProperties(MarqoBaseModel):
         localpath: The local path of the model checkpoint. It is optional.
         model_location: The location of the model. It is optional.
         tokenizer: The name of the _tokenizer. It is optional.
-        model_auth: The authentication information for the model. It is optional.
         image_preprocessor: The image preprocessor used by the model. It should be one of the values in the
             ImagePreprocessor enum.
         mean: The mean values for the image preprocessor. It is optional. It provided, it will override the
@@ -46,7 +45,6 @@ class OpenCLIPModelProperties(MarqoBaseModel):
         size: The size of the image. It is optional. If provided, it will override the default size of the image.
         note: A note about the model. It is optional.
         pretrained: The name of the pretrained model. It is optional.
-
     """
     name: str
     type: str
@@ -57,7 +55,6 @@ class OpenCLIPModelProperties(MarqoBaseModel):
     localpath: Optional[str] = None
     model_location: Optional[ModelLocation] = Field(default=None, alias="modelLocation")
     tokenizer: Optional[str] = None
-    model_auth: Optional[ModelAuth] = Field(default=None, alias="modelAuth")
     image_preprocessor: ImagePreprocessor = Field(default=ImagePreprocessor.OpenCLIP, alias="imagePreprocessor")
     mean: Optional[List[float]] = None
     std: Optional[List[float]] = None
diff --git a/tests/core/inference/test_hugging_face_model_properties.py b/tests/core/inference/test_hugging_face_model_properties.py
index b63ceee4e..16924b7a4 100644
--- a/tests/core/inference/test_hugging_face_model_properties.py
+++ b/tests/core/inference/test_hugging_face_model_properties.py
@@ -1,13 +1,16 @@
 import unittest
 from unittest import mock
 
+import pytest
 from pydantic import ValidationError
 
-from marqo.core.inference.inference_models.hugging_face_model_properties import HuggingFaceModelProperties, PoolingMethod
+from marqo.core.inference.inference_models.hugging_face_model_properties import HuggingFaceModelProperties, \
+    PoolingMethod
 from marqo.tensor_search.models.external_apis.hf import HfModelLocation
 from marqo.tensor_search.models.private_models import ModelLocation
 
 
+@pytest.mark.unittest
 class TestHuggingFaceModelProperties(unittest.TestCase):
 
     def test_valid_model_with_mandatory_fields(self):
@@ -20,7 +23,6 @@ def test_valid_model_with_mandatory_fields(self):
         self.assertEqual(768, model.dimensions)
         self.assertIsNone(model.url)
         self.assertIsNone(model.model_location)
-        self.assertIsNone(model.model_auth)
         self.assertIsNone(model.note)
 
     def test_valid_model_with_custom_fields(self):
@@ -36,9 +38,49 @@ def test_valid_model_with_custom_fields(self):
         self.assertEqual(model.dimensions, 768)
         self.assertIsNone(model.url)
         self.assertIsNone(model.model_location)
-        self.assertIsNone(model.model_auth)
         self.assertIsNone(model.note)
 
+    def test_both_original_and_alias_fields_work(self):
+        test_cases = (({
+                           "name": "test-model",
+                           "type": "hf",
+                           "dimensions": 768,
+                           "tokens": 256,
+                           "poolingMethod": "cls",
+                           "modelLocation": {
+                               "hf": {
+                                   "repoId": "test-repo-id",
+                                   "filename": "test-filename"
+                               },
+                           },
+                       }, "alias fields/camelCase"),
+                      ({
+                           "name": "test-model",
+                           "type": "hf",
+                           "dimensions": 768,
+                           "tokens": 256,
+                           "pooling_method": "cls",
+                           "model_location": {
+                               "hf": {
+                                   "repo_id": "test-repo-id",
+                                   "filename": "test-filename"
+                               },
+                           }
+                       }, "standard fields/snake_case"))
+
+        for model_properties, msg in test_cases:
+            with self.subTest(msg):
+                model = HuggingFaceModelProperties(**model_properties)
+                self.assertEqual("test-model", model.name)
+                self.assertEqual(256, model.tokens)
+                self.assertEqual("hf", model.type)
+                self.assertEqual(PoolingMethod.CLS, model.pooling_method)
+                self.assertEqual(model.dimensions, 768)
+                self.assertIsNone(model.url)
+                self.assertEqual(model.model_location,
+                                 ModelLocation(hf=HfModelLocation(repo_id="test-repo-id", filename="test-filename")))
+                self.assertIsNone(model.note)
+
     def test_invalid_type(self):
         with self.assertRaises(ValidationError) as excinfo:
             HuggingFaceModelProperties(name="test-model", type="invalid_type", dimensions=768)
@@ -70,7 +112,7 @@ def test_infer_pooling_method(self):
             with self.subTest(f"Pooling method inferred from name with {pooling_method}"):
                 with mock.patch("marqo.core.inference.inference_models.hugging_face_model_properties."
                                 "HuggingFaceModelProperties._infer_pooling_method_from_name",
-                                return_value = pooling_method) as mock_infer:
+                                return_value=pooling_method) as mock_infer:
                     model = HuggingFaceModelProperties(name="model-with-cls", type="hf", dimensions=768)
                 mock_infer.assert_called_once()
                 self.assertEqual(pooling_method, model.pooling_method)
@@ -93,7 +135,6 @@ def test_model_without_optional_fields(self):
         model = HuggingFaceModelProperties(name="test-model", type="hf", dimensions=768)
         self.assertIsNone(model.url)
         self.assertIsNone(model.model_location)
-        self.assertIsNone(model.model_auth)
         self.assertIsNone(model.note)
         self.assertEqual(model.pooling_method, PoolingMethod.Mean)
 
@@ -105,7 +146,8 @@ def test_invalid_model_without_minimum_fields(self):
     def test_invalid_model_with_both_url_and_model_location(self):
         model_location = ModelLocation(hf=HfModelLocation(repo_id="test-repo-id", filename="test-filename"))
         with self.assertRaises(ValidationError) as excinfo:
-            HuggingFaceModelProperties(url="http://example.com", model_location=model_location, type="hf", dimensions=768)
+            HuggingFaceModelProperties(url="http://example.com", model_location=model_location, type="hf",
+                                       dimensions=768)
         self.assertIn("Only one of 'url' and 'model_location' should be provided.", str(excinfo.exception))
 
     def test_valid_model_with_custom_url_and_inferred_pooling(self):
@@ -127,4 +169,4 @@ def test_some_pooling_method_infer_on_real_model(self):
         for model_name, pooling_method in test_cases:
             with self.subTest(f"Pooling method inferred from name with {model_name}"):
                 model = HuggingFaceModelProperties(name=model_name, type="hf", dimensions=768)
-                self.assertEqual(pooling_method, model.pooling_method)
\ No newline at end of file
+                self.assertEqual(pooling_method, model.pooling_method)
diff --git a/tests/core/inference/test_open_clip_model_properties.py b/tests/core/inference/test_open_clip_model_properties.py
new file mode 100644
index 000000000..9c43340dc
--- /dev/null
+++ b/tests/core/inference/test_open_clip_model_properties.py
@@ -0,0 +1,66 @@
+import unittest
+
+import pytest
+
+from marqo.core.inference.inference_models.open_clip_model_properties import OpenCLIPModelProperties
+
+
+@pytest.mark.unittest
+class TestOpenCLIPModelProperties(unittest.TestCase):
+    def test_both_original_and_alias_fields_work(self):
+        """A test for creating a valid OpenCLIPModelProperties object with all fields with
+        snake values and camel values."""
+        test_cases = (
+            (
+                {
+                    "name": "test-model",
+                    "type": "open_clip",
+                    "dimensions": 768,
+                    "tokens": 256,
+                    "modelLocation": {
+                        "hf": {
+                            "repoId": "test-repo-id",
+                            "filename": "test-filename"
+                        },
+                    },
+                    "imagePreprocessor": "SigLIP",
+                },
+
+                "alias fields/camelCase"),
+            (
+                {
+                    "name": "test-model",
+                    "type": "open_clip",
+                    "dimensions": 768,
+                    "tokens": 256,
+                    "model_location": {
+                        "hf": {
+                            "repo_id": "test-repo-id",
+                            "filename": "test-filename"
+                        },
+                    },
+                    "image_preprocessor": "SigLIP",
+
+                },
+                "original fields/snake_case"),
+        )
+
+        for model_properties, msg in test_cases:
+            with self.subTest(msg):
+                open_clip_model_properties = OpenCLIPModelProperties(**model_properties)
+                self.assertEqual(open_clip_model_properties.name, "test-model")
+                self.assertEqual(open_clip_model_properties.type, "open_clip")
+                self.assertEqual(open_clip_model_properties.dimensions, 768)
+                self.assertEqual(open_clip_model_properties.jit, False)
+                self.assertEqual(open_clip_model_properties.precision, "fp32")
+                self.assertIsNone(open_clip_model_properties.url)
+                self.assertIsNone(open_clip_model_properties.localpath)
+                self.assertEqual(open_clip_model_properties.model_location.hf.repo_id, "test-repo-id")
+                self.assertEqual(open_clip_model_properties.model_location.hf.filename, "test-filename")
+                self.assertEqual(open_clip_model_properties.tokenizer, None)
+                self.assertEqual(open_clip_model_properties.image_preprocessor, "SigLIP")
+                self.assertEqual(open_clip_model_properties.mean, None)
+                self.assertEqual(open_clip_model_properties.std, None)
+                self.assertEqual(open_clip_model_properties.size, None)
+                self.assertEqual(open_clip_model_properties.note, None)
+                self.assertEqual(open_clip_model_properties.pretrained, None)
\ No newline at end of file

From 0ed66067157889aad9e6f9cc822d002c5b663b3c Mon Sep 17 00:00:00 2001
From: Li Wan <li@marqo.ai>
Date: Tue, 22 Oct 2024 17:11:20 +1100
Subject: [PATCH 60/63] Fix Yihan's comments

---
 src/marqo/marqo_docs.py                                   | 3 ++-
 .../s2_inference/languagebind/video/modeling_video.py     | 8 ++++----
 src/marqo/s2_inference/multimodal_model_load.py           | 2 +-
 src/marqo/tensor_search/models/private_models.py          | 2 +-
 4 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/marqo/marqo_docs.py b/src/marqo/marqo_docs.py
index abf31d8b2..a05c6f90a 100644
--- a/src/marqo/marqo_docs.py
+++ b/src/marqo/marqo_docs.py
@@ -50,7 +50,7 @@ def configuring_preloaded_models():
 
 
 def bring_your_own_model():
-    return _build_url('Guides/Models-Reference/bring_your_own_model/')
+    return _build_url('models/marqo/bring-your-own-model')
 
 
 def query_reference():
@@ -68,6 +68,7 @@ def api_reference_document_body():
 def troubleshooting():
     return _build_url('other-resources/troubleshooting/troubleshooting/')
 
+
 def generic_models():
     return _build_url('models/marqo/list-of-models/#generic-clip-models')
 
diff --git a/src/marqo/s2_inference/languagebind/video/modeling_video.py b/src/marqo/s2_inference/languagebind/video/modeling_video.py
index 51f162fd3..7042a6dd8 100644
--- a/src/marqo/s2_inference/languagebind/video/modeling_video.py
+++ b/src/marqo/s2_inference/languagebind/video/modeling_video.py
@@ -675,9 +675,9 @@ def forward(
         >>> from transformers import AutoTokenizer, CLIPTextModel
 
         >>> model = CLIPTextModel.from_pretrained("openai/clip-vit-base-patch32")
-        >>> _tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")
+        >>> tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")
 
-        >>> inputs = _tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt")
+        >>> inputs = tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt")
 
         >>> outputs = model(**inputs)
         >>> last_hidden_state = outputs.last_hidden_state
@@ -945,9 +945,9 @@ def get_text_features(
         >>> from transformers import AutoTokenizer, CLIPModel
 
         >>> model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
-        >>> _tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")
+        >>> tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")
 
-        >>> inputs = _tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt")
+        >>> inputs = tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt")
         >>> text_features = model.get_text_features(**inputs)
         ```"""
         # Use CLIP model's config for some fields (if specified) instead of those of vision & text components.
diff --git a/src/marqo/s2_inference/multimodal_model_load.py b/src/marqo/s2_inference/multimodal_model_load.py
index 618faa652..173630c22 100644
--- a/src/marqo/s2_inference/multimodal_model_load.py
+++ b/src/marqo/s2_inference/multimodal_model_load.py
@@ -99,7 +99,7 @@ def _load_languagebind_model(self):
                 'video': 'LanguageBind_Video_V1.5_FT',
             }
         else:
-            raise ValueError(f"Unsupported LanguageBind _model: {self.model_name}")
+            raise ValueError(f"Unsupported LanguageBind model: {self.model_name}")
         model = LanguageBind(clip_type=self.clip_type, cache_dir=ModelCache.languagebind_cache_path).to(self.device)
         model.eval()
         return model
diff --git a/src/marqo/tensor_search/models/private_models.py b/src/marqo/tensor_search/models/private_models.py
index ea3f197ef..b23fa7294 100644
--- a/src/marqo/tensor_search/models/private_models.py
+++ b/src/marqo/tensor_search/models/private_models.py
@@ -39,7 +39,7 @@ class ModelLocation(MarqoBaseModel):
 
     s3: Optional[S3Location] = None
     hf: Optional[HfModelLocation] = None
-    auth_required: bool = Field(default=False, aliase="authRequired")
+    auth_required: bool = Field(default=False, alias="authRequired")
 
 
     @root_validator(skip_on_failure=True)

From ea9aebc38562f0fdb376dc2e5ccbe7c64581cead Mon Sep 17 00:00:00 2001
From: Li Wan <li@marqo.ai>
Date: Wed, 23 Oct 2024 14:14:43 +1100
Subject: [PATCH 61/63] Fix Yihan and Joshua's comments

---
 .../inference_models/hugging_face_model.py    | 15 +++--------
 .../hugging_face_model_properties.py          | 25 +++++++++----------
 .../marqo_base_model_properties.py            | 17 +++++++++++++
 .../open_clip_model_properties.py             |  6 ++---
 .../core/inference/test_hugging_face_model.py |  7 ++----
 5 files changed, 36 insertions(+), 34 deletions(-)
 create mode 100644 src/marqo/core/inference/inference_models/marqo_base_model_properties.py

diff --git a/src/marqo/core/inference/inference_models/hugging_face_model.py b/src/marqo/core/inference/inference_models/hugging_face_model.py
index 8e24e6e6c..a5ab03844 100644
--- a/src/marqo/core/inference/inference_models/hugging_face_model.py
+++ b/src/marqo/core/inference/inference_models/hugging_face_model.py
@@ -54,12 +54,6 @@ def _load_necessary_components(self):
         Raises:
             InvalidModelPropertiesError: If the model properties are invalid or incomplete.
         """
-        if not (self.model_properties.name or self.model_properties.url or self.model_properties.model_location):
-            raise InvalidModelPropertiesError(
-                f"Invalid model properties for the 'hf' model. "
-                f"You do not have the necessary information to load the model. "
-                f"Check {marqo_docs.bring_your_own_model()} for more information."
-            )
 
         if self.model_properties.name:
             self._model, self._tokenizer = self._load_from_hugging_face_repo()
@@ -72,11 +66,8 @@ def _load_necessary_components(self):
             else:
                 self._model, self._tokenizer = self._load_from_private_hugging_face_repo()
         else:
-            raise InvalidModelPropertiesError(
-                f"Invalid model properties for the 'hf' model. "
-                f"You do not have the necessary information to load the model. "
-                f"Check {marqo_docs.bring_your_own_model()} for more information."
-            )
+            raise InternalError(f"Invalid model properties: {self.model_properties}. Marqo can not load the model via "
+                                f"a specified method. Please check the model properties and try again.")
 
         self._model = self._model.to(self.device)
         self._pooling_func = self._load_pooling_method()
@@ -193,7 +184,7 @@ def _cls_pool_func(model_output, attention_mask=None):
     @staticmethod
     def extract_huggingface_archive(path: str) -> str:
         '''
-            This function takes the path as input. The path can must be a string that can be:
+            This function takes the path as input. The path is a string that can be one of the followings:
             1. A downloaded archive file. This function will extract the model from the archive return the directory path.
             2. A repo_id in huggingface. This function will return the input string directly.
 
diff --git a/src/marqo/core/inference/inference_models/hugging_face_model_properties.py b/src/marqo/core/inference/inference_models/hugging_face_model_properties.py
index 239cec98e..51588a1da 100644
--- a/src/marqo/core/inference/inference_models/hugging_face_model_properties.py
+++ b/src/marqo/core/inference/inference_models/hugging_face_model_properties.py
@@ -7,10 +7,10 @@
 from huggingface_hub.utils import HfHubHTTPError
 from pydantic import Field, validator, root_validator
 
-from marqo.base_model import MarqoBaseModel
+from marqo.core.inference.inference_models.marqo_base_model_properties import MarqoBaseModelProperties
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.logger import get_logger
-from marqo.tensor_search.models.private_models import ModelLocation, ModelAuth
+from marqo.tensor_search.models.private_models import ModelLocation
 
 logger = get_logger(__name__)
 
@@ -20,7 +20,7 @@ class PoolingMethod(str, Enum):
     CLS = "cls"
 
 
-class HuggingFaceModelProperties(MarqoBaseModel):
+class HuggingFaceModelProperties(MarqoBaseModelProperties):
     """
     A class to represent the properties of a Hugging Face model.
 
@@ -38,8 +38,6 @@ class HuggingFaceModelProperties(MarqoBaseModel):
     """
     name: Optional[str] = None
     tokens: int = 128
-    type: str
-    dimensions: int = Field(..., ge=1)
     url: Optional[str] = None
     model_location: Optional[ModelLocation] = Field(default=None, alias="modelLocation")
     note: Optional[str] = None
@@ -80,30 +78,31 @@ def _infer_pooling_method_from_name(name: str) -> PoolingMethod:
         """
         repo_id = name
         file_name = "1_Pooling/config.json"
+
+        def log_warning_and_return_default():
+            logger.warning(f"Could not infer pooling method from the model {name}. Defaulting to mean pooling.")
+            return PoolingMethod.Mean
+
         try:
             file_path = hf_hub_download(repo_id, file_name, cache_dir=ModelCache.hf_cache_path)
         except HfHubHTTPError:
-            logger.warn(f"Could not infer pooling method from the model {name}. Defaulting to mean pooling.")
-            return PoolingMethod.Mean
+            return log_warning_and_return_default()
 
         try:
             with open(file_path, 'r') as file:
                 content = json.loads(file.read())
         except JSONDecodeError:
-            logger.warn(f"Could not infer pooling method from the model {name}. Defaulting to mean pooling.")
-            return PoolingMethod.Mean
+            return log_warning_and_return_default()
 
         if not isinstance(content, dict):
-            logger.warn(f"Could not infer pooling method from the model {name}. Defaulting to mean pooling.")
-            return PoolingMethod.Mean
+            return log_warning_and_return_default()
 
         if content.get("pooling_mode_cls_token") is True:
             return PoolingMethod.CLS
         elif content.get("pooling_mode_mean_tokens") is True:
             return PoolingMethod.Mean
         else:
-            logger.warn(f"Could not infer pooling method from the model {name}. Defaulting to mean pooling.")
-            return PoolingMethod.Mean
+            return log_warning_and_return_default()
 
     @root_validator(skip_on_failure=True)
     def _validate_minimum_required_fields_to_load(cls, values):
diff --git a/src/marqo/core/inference/inference_models/marqo_base_model_properties.py b/src/marqo/core/inference/inference_models/marqo_base_model_properties.py
new file mode 100644
index 000000000..bfff9e178
--- /dev/null
+++ b/src/marqo/core/inference/inference_models/marqo_base_model_properties.py
@@ -0,0 +1,17 @@
+from abc import ABC
+
+from pydantic import Field
+
+from marqo.base_model import MarqoBaseModel
+
+
+class MarqoBaseModelProperties(MarqoBaseModel, ABC):
+    """
+    The base class for all model properties classes in Marqo.
+
+    Attributes:
+        dimensions: The dimensions of the model.
+        type: The type of the model
+    """
+    dimensions: int = Field(..., ge=1)
+    type: str
diff --git a/src/marqo/core/inference/inference_models/open_clip_model_properties.py b/src/marqo/core/inference/inference_models/open_clip_model_properties.py
index be2b45ba4..9eb0822d0 100644
--- a/src/marqo/core/inference/inference_models/open_clip_model_properties.py
+++ b/src/marqo/core/inference/inference_models/open_clip_model_properties.py
@@ -4,7 +4,7 @@
 
 from pydantic import Field, root_validator
 
-from marqo.base_model import MarqoBaseModel
+from marqo.core.inference.inference_models.marqo_base_model_properties import MarqoBaseModelProperties
 from marqo.tensor_search.models.private_models import ModelLocation
 
 
@@ -21,7 +21,7 @@ class Precision(str, Enum):
     FP16 = "fp16"
 
 
-class OpenCLIPModelProperties(MarqoBaseModel):
+class OpenCLIPModelProperties(MarqoBaseModelProperties):
     """
     A class to represent the properties of an OpenCLIP model.
 
@@ -47,8 +47,6 @@ class OpenCLIPModelProperties(MarqoBaseModel):
         pretrained: The name of the pretrained model. It is optional.
     """
     name: str
-    type: str
-    dimensions: int = Field(..., ge=1)
     jit: bool = False
     precision: Precision = Precision.FP32
     url: Optional[str] = None
diff --git a/tests/core/inference/test_hugging_face_model.py b/tests/core/inference/test_hugging_face_model.py
index 028f7e34a..a62e25f8c 100644
--- a/tests/core/inference/test_hugging_face_model.py
+++ b/tests/core/inference/test_hugging_face_model.py
@@ -1,14 +1,11 @@
 import unittest
 from unittest import mock
 
-from pydantic import ValidationError
+import numpy as np
 
-from marqo.core.inference.inference_models.hugging_face_model_properties import HuggingFaceModelProperties, PoolingMethod
-from marqo.tensor_search.models.external_apis.hf import HfModelLocation
-from marqo.tensor_search.models.private_models import ModelLocation
 from marqo.core.inference.inference_models.hugging_face_model import HuggingFaceModel
+from marqo.core.inference.inference_models.hugging_face_model_properties import PoolingMethod
 from marqo.s2_inference.errors import InvalidModelPropertiesError
-import numpy as np
 
 
 class TestHuggingFaceModel(unittest.TestCase):

From 2d2a62f9b9c463d91316742e9d8ad442a6540af6 Mon Sep 17 00:00:00 2001
From: Li Wan <li@marqo.ai>
Date: Wed, 23 Oct 2024 14:23:44 +1100
Subject: [PATCH 62/63] Respond to comments

---
 tests/core/inference/test_open_clip_model_load.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tests/core/inference/test_open_clip_model_load.py b/tests/core/inference/test_open_clip_model_load.py
index c0bb3e50e..29af32b4f 100644
--- a/tests/core/inference/test_open_clip_model_load.py
+++ b/tests/core/inference/test_open_clip_model_load.py
@@ -238,7 +238,7 @@ def test_load_OpenCLIPModel_with_auth_s3(self):
             aws_secret_access_key="my_secret_key",
         ))
 
-        with patch("marqo.s2_inference.clip_utils.download_model") as mock_download_model:
+        with patch("marqo.core.inference.inference_models.open_clip_model.download_model") as mock_download_model:
             # It's ok to return a RuntimeError as we are testing the download_model function
             with self.assertRaises(RuntimeError):
                 model = OPEN_CLIP(model_properties=model_properties, device="cpu", model_auth=model_auth)
@@ -267,14 +267,13 @@ def test_load_OpenCLIPModel_with_auth_hf(self):
 
         model_auth = ModelAuth(**{"hf": {"token":"my_hf_token"}})
 
-        with patch("marqo.s2_inference.clip_utils.download_model") as mock_download_model:
+        with patch("marqo.core.inference.inference_models.open_clip_model.download_model") as mock_download_model:
             # It's ok to return a RuntimeError as we are testing the download_model function
-            with self.assertRaises(RuntimeError):
+            with self.assertRaises(RuntimeError) as e:
                 model = OPEN_CLIP(model_properties=model_properties, device="cpu", model_auth=model_auth)
                 model.load()
 
             mock_download_model.assert_called_once_with(
                 repo_location=ModelLocation(**model_properties["model_location"]),
                 auth=model_auth,
-            )
-
+            )
\ No newline at end of file

From 50d0a6f8eb3ada9edd7a53336e535ab9ab71713f Mon Sep 17 00:00:00 2001
From: Li Wan <li@marqo.ai>
Date: Wed, 23 Oct 2024 14:48:49 +1100
Subject: [PATCH 63/63] Rename folder to embeddings models

---
 .../__init__.py                               |  0
 .../abstract_clip_model.py                    |  4 +--
 .../abstract_embedding_model.py               |  0
 .../hf_tokenizer.py                           |  0
 .../hugging_face_model.py                     |  4 +--
 .../hugging_face_model_properties.py          |  2 +-
 .../image_download.py                         |  0
 .../marqo_base_model_properties.py            |  0
 .../open_clip_model.py                        |  6 ++--
 .../open_clip_model_properties.py             |  2 +-
 src/marqo/s2_inference/clip_utils.py          |  9 ++----
 src/marqo/s2_inference/model_registry.py      |  4 +--
 src/marqo/s2_inference/s2_inference.py        |  2 +-
 .../test_corrupt_file_error_handling.py       | 18 ++++++------
 .../core/inference/test_hugging_face_model.py |  4 +--
 .../test_hugging_face_model_properties.py     |  6 ++--
 .../core/inference/test_marqo_fashion_clip.py |  2 +-
 .../inference/test_open_clip_model_load.py    | 28 +++++++++----------
 .../test_open_clip_model_properties.py        |  2 +-
 tests/s2_inference/test_clip_utils.py         |  2 +-
 tests/tensor_search/test_model_auth.py        | 18 ++++++------
 21 files changed, 54 insertions(+), 59 deletions(-)
 rename src/marqo/core/inference/{inference_models => embedding_models}/__init__.py (100%)
 rename src/marqo/core/inference/{inference_models => embedding_models}/abstract_clip_model.py (97%)
 rename src/marqo/core/inference/{inference_models => embedding_models}/abstract_embedding_model.py (100%)
 rename src/marqo/core/inference/{inference_models => embedding_models}/hf_tokenizer.py (100%)
 rename src/marqo/core/inference/{inference_models => embedding_models}/hugging_face_model.py (98%)
 rename src/marqo/core/inference/{inference_models => embedding_models}/hugging_face_model_properties.py (98%)
 rename src/marqo/core/inference/{inference_models => embedding_models}/image_download.py (100%)
 rename src/marqo/core/inference/{inference_models => embedding_models}/marqo_base_model_properties.py (100%)
 rename src/marqo/core/inference/{inference_models => embedding_models}/open_clip_model.py (98%)
 rename src/marqo/core/inference/{inference_models => embedding_models}/open_clip_model_properties.py (98%)

diff --git a/src/marqo/core/inference/inference_models/__init__.py b/src/marqo/core/inference/embedding_models/__init__.py
similarity index 100%
rename from src/marqo/core/inference/inference_models/__init__.py
rename to src/marqo/core/inference/embedding_models/__init__.py
diff --git a/src/marqo/core/inference/inference_models/abstract_clip_model.py b/src/marqo/core/inference/embedding_models/abstract_clip_model.py
similarity index 97%
rename from src/marqo/core/inference/inference_models/abstract_clip_model.py
rename to src/marqo/core/inference/embedding_models/abstract_clip_model.py
index 491c0f9d2..1b2a33b23 100644
--- a/src/marqo/core/inference/inference_models/abstract_clip_model.py
+++ b/src/marqo/core/inference/embedding_models/abstract_clip_model.py
@@ -6,8 +6,8 @@
 
 from marqo.core.inference.image_download import (_is_image, format_and_load_CLIP_images,
                                                  format_and_load_CLIP_image)
-from marqo.core.inference.inference_models.abstract_embedding_model import AbstractEmbeddingModel
-from marqo.core.inference.inference_models.image_download import (_is_image, format_and_load_CLIP_images,
+from marqo.core.inference.embedding_models.abstract_embedding_model import AbstractEmbeddingModel
+from marqo.core.inference.embedding_models.image_download import (_is_image, format_and_load_CLIP_images,
                                                                   format_and_load_CLIP_image)
 from marqo.s2_inference.logger import get_logger
 from marqo.s2_inference.types import *
diff --git a/src/marqo/core/inference/inference_models/abstract_embedding_model.py b/src/marqo/core/inference/embedding_models/abstract_embedding_model.py
similarity index 100%
rename from src/marqo/core/inference/inference_models/abstract_embedding_model.py
rename to src/marqo/core/inference/embedding_models/abstract_embedding_model.py
diff --git a/src/marqo/core/inference/inference_models/hf_tokenizer.py b/src/marqo/core/inference/embedding_models/hf_tokenizer.py
similarity index 100%
rename from src/marqo/core/inference/inference_models/hf_tokenizer.py
rename to src/marqo/core/inference/embedding_models/hf_tokenizer.py
diff --git a/src/marqo/core/inference/inference_models/hugging_face_model.py b/src/marqo/core/inference/embedding_models/hugging_face_model.py
similarity index 98%
rename from src/marqo/core/inference/inference_models/hugging_face_model.py
rename to src/marqo/core/inference/embedding_models/hugging_face_model.py
index a5ab03844..614c9268d 100644
--- a/src/marqo/core/inference/inference_models/hugging_face_model.py
+++ b/src/marqo/core/inference/embedding_models/hugging_face_model.py
@@ -11,8 +11,8 @@
 
 from marqo import marqo_docs
 from marqo.core.inference.model_download import download_model
-from marqo.core.inference.inference_models.abstract_embedding_model import AbstractEmbeddingModel
-from marqo.core.inference.inference_models.hugging_face_model_properties import HuggingFaceModelProperties, PoolingMethod
+from marqo.core.inference.embedding_models.abstract_embedding_model import AbstractEmbeddingModel
+from marqo.core.inference.embedding_models.hugging_face_model_properties import HuggingFaceModelProperties, PoolingMethod
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.errors import InvalidModelPropertiesError
 from marqo.s2_inference.types import Union, FloatTensor, List
diff --git a/src/marqo/core/inference/inference_models/hugging_face_model_properties.py b/src/marqo/core/inference/embedding_models/hugging_face_model_properties.py
similarity index 98%
rename from src/marqo/core/inference/inference_models/hugging_face_model_properties.py
rename to src/marqo/core/inference/embedding_models/hugging_face_model_properties.py
index 51588a1da..a5f42612a 100644
--- a/src/marqo/core/inference/inference_models/hugging_face_model_properties.py
+++ b/src/marqo/core/inference/embedding_models/hugging_face_model_properties.py
@@ -7,7 +7,7 @@
 from huggingface_hub.utils import HfHubHTTPError
 from pydantic import Field, validator, root_validator
 
-from marqo.core.inference.inference_models.marqo_base_model_properties import MarqoBaseModelProperties
+from marqo.core.inference.embedding_models.marqo_base_model_properties import MarqoBaseModelProperties
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.logger import get_logger
 from marqo.tensor_search.models.private_models import ModelLocation
diff --git a/src/marqo/core/inference/inference_models/image_download.py b/src/marqo/core/inference/embedding_models/image_download.py
similarity index 100%
rename from src/marqo/core/inference/inference_models/image_download.py
rename to src/marqo/core/inference/embedding_models/image_download.py
diff --git a/src/marqo/core/inference/inference_models/marqo_base_model_properties.py b/src/marqo/core/inference/embedding_models/marqo_base_model_properties.py
similarity index 100%
rename from src/marqo/core/inference/inference_models/marqo_base_model_properties.py
rename to src/marqo/core/inference/embedding_models/marqo_base_model_properties.py
diff --git a/src/marqo/core/inference/inference_models/open_clip_model.py b/src/marqo/core/inference/embedding_models/open_clip_model.py
similarity index 98%
rename from src/marqo/core/inference/inference_models/open_clip_model.py
rename to src/marqo/core/inference/embedding_models/open_clip_model.py
index 806551abb..edb17b3d4 100644
--- a/src/marqo/core/inference/inference_models/open_clip_model.py
+++ b/src/marqo/core/inference/embedding_models/open_clip_model.py
@@ -8,9 +8,9 @@
 from torchvision.transforms import Compose
 
 from marqo import marqo_docs
-from marqo.core.inference.inference_models.abstract_clip_model import AbstractCLIPModel
-from marqo.core.inference.inference_models.hf_tokenizer import HFTokenizer
-from marqo.core.inference.inference_models.open_clip_model_properties import OpenCLIPModelProperties, ImagePreprocessor
+from marqo.core.inference.embedding_models.abstract_clip_model import AbstractCLIPModel
+from marqo.core.inference.embedding_models.hf_tokenizer import HFTokenizer
+from marqo.core.inference.embedding_models.open_clip_model_properties import OpenCLIPModelProperties, ImagePreprocessor
 from marqo.core.inference.model_download import download_model
 from marqo.exceptions import InternalError
 from marqo.s2_inference.configs import ModelCache
diff --git a/src/marqo/core/inference/inference_models/open_clip_model_properties.py b/src/marqo/core/inference/embedding_models/open_clip_model_properties.py
similarity index 98%
rename from src/marqo/core/inference/inference_models/open_clip_model_properties.py
rename to src/marqo/core/inference/embedding_models/open_clip_model_properties.py
index 9eb0822d0..f609dcd6c 100644
--- a/src/marqo/core/inference/inference_models/open_clip_model_properties.py
+++ b/src/marqo/core/inference/embedding_models/open_clip_model_properties.py
@@ -4,7 +4,7 @@
 
 from pydantic import Field, root_validator
 
-from marqo.core.inference.inference_models.marqo_base_model_properties import MarqoBaseModelProperties
+from marqo.core.inference.embedding_models.marqo_base_model_properties import MarqoBaseModelProperties
 from marqo.tensor_search.models.private_models import ModelLocation
 
 
diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
index cffa82d3d..342e6d849 100644
--- a/src/marqo/s2_inference/clip_utils.py
+++ b/src/marqo/s2_inference/clip_utils.py
@@ -12,24 +12,19 @@
 import validators
 from PIL import Image, UnidentifiedImageError
 from multilingual_clip import pt_multilingual_clip
-from open_clip.pretrained import _pcfg, _slpcfg, _apcfg
-from open_clip.transform import image_transform_v2, PreprocessCfg, merge_preprocess_dict
 from requests.utils import requote_uri
 from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
 from torchvision.transforms import InterpolationMode
 
 from marqo import marqo_docs
 from marqo.api.exceptions import InternalError
-from marqo.core.inference.inference_models.abstract_clip_model import AbstractCLIPModel
-from marqo.core.inference.inference_models.open_clip_model_properties import OpenCLIPModelProperties, ImagePreprocessor
+from marqo.core.inference.model_download import download_model
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.errors import InvalidModelPropertiesError, ImageDownloadError
 from marqo.s2_inference.logger import get_logger
-from marqo.core.inference.inference_models.hf_tokenizer import HFTokenizer
-from marqo.core.inference.model_download import download_model
 from marqo.s2_inference.types import *
 from marqo.tensor_search.enums import ModelProperties, InferenceParams
-from marqo.tensor_search.models.private_models import ModelAuth, ModelLocation
+from marqo.tensor_search.models.private_models import ModelLocation
 from marqo.tensor_search.telemetry import RequestMetrics
 
 logger = get_logger(__name__)
diff --git a/src/marqo/s2_inference/model_registry.py b/src/marqo/s2_inference/model_registry.py
index 80019fe30..ba78eb8b9 100644
--- a/src/marqo/s2_inference/model_registry.py
+++ b/src/marqo/s2_inference/model_registry.py
@@ -1,7 +1,7 @@
 from marqo.s2_inference.clip_utils import CLIP, MULTILINGUAL_CLIP, FP16_CLIP, \
     get_multilingual_clip_properties
-from marqo.core.inference.inference_models.open_clip_model import OPEN_CLIP
-from marqo.core.inference.inference_models.hugging_face_model import HuggingFaceModel
+from marqo.core.inference.embedding_models.open_clip_model import OPEN_CLIP
+from marqo.core.inference.embedding_models.hugging_face_model import HuggingFaceModel
 from marqo.s2_inference.onnx_clip_utils import CLIP_ONNX
 from marqo.s2_inference.random_utils import Random
 from marqo.s2_inference.sbert_onnx_utils import SBERT_ONNX
diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py
index 23a242a25..fc97d5300 100644
--- a/src/marqo/s2_inference/s2_inference.py
+++ b/src/marqo/s2_inference/s2_inference.py
@@ -15,7 +15,7 @@
 from marqo import marqo_docs
 from marqo.api.exceptions import ModelCacheManagementError, ConfigurationError, InternalError
 from marqo.s2_inference import constants
-from marqo.core.inference.inference_models.open_clip_model import OPEN_CLIP
+from marqo.core.inference.embedding_models.open_clip_model import OPEN_CLIP
 from marqo.s2_inference.clip_utils import CLIP
 from marqo.s2_inference.configs import get_default_normalization, get_default_seq_length
 from marqo.s2_inference.errors import (
diff --git a/tests/core/inference/test_corrupt_file_error_handling.py b/tests/core/inference/test_corrupt_file_error_handling.py
index fae20c65b..de459cc45 100644
--- a/tests/core/inference/test_corrupt_file_error_handling.py
+++ b/tests/core/inference/test_corrupt_file_error_handling.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-from marqo.core.inference.inference_models.hugging_face_model import HuggingFaceModel
+from marqo.core.inference.embedding_models.hugging_face_model import HuggingFaceModel
 from marqo.s2_inference.errors import InvalidModelPropertiesError
 from marqo.s2_inference.s2_inference import _load_model
 
@@ -62,7 +62,7 @@ def test_corrupted_file_handling(self, mock_os_remove, mock_create_model_and_tra
         """Ensure that a proper error is raised when a corrupted file is encountered. The file should be removed."""
         mock_create_model_and_transforms.side_effect = RuntimeError("The file might be corrupted")
         for model_properties in self.dummpy_model_properties:
-            with patch("marqo.core.inference.inference_models.open_clip_model.download_model",
+            with patch("marqo.core.inference.embedding_models.open_clip_model.download_model",
                        return_value = self.dummpy_corrupted_file):
                 with self.assertRaises(InvalidModelPropertiesError) as context:
                     _ = _load_model(**self.load_parameters, model_properties=model_properties)
@@ -79,7 +79,7 @@ def test_file_removal_failure_handling(self, mock_os_remove, mock_create_model_a
         # Setup
         mock_create_model_and_transforms.side_effect = RuntimeError("The file might be corrupted")
         mock_os_remove.side_effect = OSError("Permission denied")
-        with patch("marqo.core.inference.inference_models.open_clip_model.download_model",
+        with patch("marqo.core.inference.embedding_models.open_clip_model.download_model",
                    return_value = self.dummpy_corrupted_file):
             for model_properties in self.dummpy_model_properties:
                 # Execute and Verify
@@ -98,7 +98,7 @@ def test_file_removal_failure_handling(self, mock_os_remove, mock_create_model_a
     def test_other_errors_handling(self, mock_os_remove, mock_create_model_and_transforms):
         # Setup
         mock_create_model_and_transforms.side_effect = Exception("An error occurred")
-        with patch("marqo.core.inference.inference_models.open_clip_model.download_model",
+        with patch("marqo.core.inference.embedding_models.open_clip_model.download_model",
                    return_value = self.dummpy_corrupted_file):
             for model_properties in self.dummpy_model_properties:
                 # Execute and Verify
@@ -113,7 +113,7 @@ def test_load_clip_into_open_clip_errors_handling(self, mock_os_remove, mock_cre
         # Setup
         mock_create_model_and_transforms.side_effect = Exception(
             "This could be because the operator doesn't exist for this backend")
-        with patch("marqo.core.inference.inference_models.open_clip_model.download_model",
+        with patch("marqo.core.inference.embedding_models.open_clip_model.download_model",
                    return_value = self.dummpy_corrupted_file):
             for model_properties in self.dummpy_model_properties:
                 # Execute and Verify
@@ -207,7 +207,7 @@ def test_regular_file(self):
         with patch('os.path.isfile', return_value=True), \
              patch('os.path.splitext', return_value=('/path/to/file', '.txt')), \
              patch('os.makedirs'), \
-             patch("marqo.core.inference.inference_models.hugging_face_model.download_model", return_value = "/path/to/file.txt"):
+             patch("marqo.core.inference.embedding_models.hugging_face_model.download_model", return_value = "/path/to/file.txt"):
             for model_properties in self.dummy_model_properties:
                 with self.assertRaises(RuntimeError) as context:
                     _ = _load_model(**self.load_parameters, model_properties=model_properties)
@@ -218,7 +218,7 @@ def test_zip_file(self):
              patch('os.path.splitext', return_value=('/path/to/file', '.zip')), \
              patch('os.makedirs') as mock_makedirs, \
              patch('zipfile.ZipFile') as mock_zipfile, \
-             patch("marqo.core.inference.inference_models.hugging_face_model.download_model", return_value="/path/to/file.zip"),\
+             patch("marqo.core.inference.embedding_models.hugging_face_model.download_model", return_value="/path/to/file.zip"),\
              patch("transformers.AutoModel.from_pretrained") as mock_model,\
              patch("transformers.AutoTokenizer.from_pretrained") as mock_tokenizer:
 
@@ -240,7 +240,7 @@ def test_tar_file(self):
             patch('os.path.splitext', return_value=('/path/to/file', '.tar')), \
             patch('os.makedirs') as mock_makedirs, \
             patch('tarfile.open') as mock_tarfile,\
-            patch("marqo.core.inference.inference_models.hugging_face_model.download_model", return_value="/path/to/file.tar"), \
+            patch("marqo.core.inference.embedding_models.hugging_face_model.download_model", return_value="/path/to/file.tar"), \
             patch("transformers.AutoModel.from_pretrained") as mock_model, \
             patch("transformers.AutoTokenizer.from_pretrained") as mock_tokenizer:
 
@@ -259,7 +259,7 @@ def test_tar_file(self):
 
     def test_directory(self):
         with patch('os.path.isfile', return_value=False),\
-            patch("marqo.core.inference.inference_models.hugging_face_model.download_model", return_value="/path/to/file.tar"), \
+            patch("marqo.core.inference.embedding_models.hugging_face_model.download_model", return_value="/path/to/file.tar"), \
             patch("transformers.AutoModel.from_pretrained") as mock_model, \
             patch("transformers.AutoTokenizer.from_pretrained") as mock_tokenizer:
             self.assertEqual(HuggingFaceModel.extract_huggingface_archive('/path/to/directory'), '/path/to/directory')
diff --git a/tests/core/inference/test_hugging_face_model.py b/tests/core/inference/test_hugging_face_model.py
index a62e25f8c..aad11ae6e 100644
--- a/tests/core/inference/test_hugging_face_model.py
+++ b/tests/core/inference/test_hugging_face_model.py
@@ -3,8 +3,8 @@
 
 import numpy as np
 
-from marqo.core.inference.inference_models.hugging_face_model import HuggingFaceModel
-from marqo.core.inference.inference_models.hugging_face_model_properties import PoolingMethod
+from marqo.core.inference.embedding_models.hugging_face_model import HuggingFaceModel
+from marqo.core.inference.embedding_models.hugging_face_model_properties import PoolingMethod
 from marqo.s2_inference.errors import InvalidModelPropertiesError
 
 
diff --git a/tests/core/inference/test_hugging_face_model_properties.py b/tests/core/inference/test_hugging_face_model_properties.py
index 16924b7a4..8c6e2a8cc 100644
--- a/tests/core/inference/test_hugging_face_model_properties.py
+++ b/tests/core/inference/test_hugging_face_model_properties.py
@@ -4,7 +4,7 @@
 import pytest
 from pydantic import ValidationError
 
-from marqo.core.inference.inference_models.hugging_face_model_properties import HuggingFaceModelProperties, \
+from marqo.core.inference.embedding_models.hugging_face_model_properties import HuggingFaceModelProperties, \
     PoolingMethod
 from marqo.tensor_search.models.external_apis.hf import HfModelLocation
 from marqo.tensor_search.models.private_models import ModelLocation
@@ -110,7 +110,7 @@ def test_invalid_model_with_url_and_model_location(self):
     def test_infer_pooling_method(self):
         for pooling_method in (PoolingMethod.Mean, PoolingMethod.CLS):
             with self.subTest(f"Pooling method inferred from name with {pooling_method}"):
-                with mock.patch("marqo.core.inference.inference_models.hugging_face_model_properties."
+                with mock.patch("marqo.core.inference.embedding_models.hugging_face_model_properties."
                                 "HuggingFaceModelProperties._infer_pooling_method_from_name",
                                 return_value=pooling_method) as mock_infer:
                     model = HuggingFaceModelProperties(name="model-with-cls", type="hf", dimensions=768)
@@ -118,7 +118,7 @@ def test_infer_pooling_method(self):
                 self.assertEqual(pooling_method, model.pooling_method)
 
     def test_explicit_valid_pooling_method(self):
-        with mock.patch("marqo.core.inference.inference_models.hugging_face_model_properties."
+        with mock.patch("marqo.core.inference.embedding_models.hugging_face_model_properties."
                         "HuggingFaceModelProperties._infer_pooling_method_from_name") as mock_infer:
             model = HuggingFaceModelProperties(name="test-model", type="hf", pooling_method=PoolingMethod.CLS,
                                                dimensions=768)
diff --git a/tests/core/inference/test_marqo_fashion_clip.py b/tests/core/inference/test_marqo_fashion_clip.py
index 2e25814b8..28bcb747c 100644
--- a/tests/core/inference/test_marqo_fashion_clip.py
+++ b/tests/core/inference/test_marqo_fashion_clip.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-from marqo.core.inference.inference_models.open_clip_model import OPEN_CLIP
+from marqo.core.inference.embedding_models.open_clip_model import OPEN_CLIP
 from marqo.s2_inference.model_registry import _get_open_clip_properties
 from marqo.s2_inference.s2_inference import clear_loaded_models
 
diff --git a/tests/core/inference/test_open_clip_model_load.py b/tests/core/inference/test_open_clip_model_load.py
index 29af32b4f..7ecd6bb80 100644
--- a/tests/core/inference/test_open_clip_model_load.py
+++ b/tests/core/inference/test_open_clip_model_load.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-from marqo.core.inference.inference_models.open_clip_model import OPEN_CLIP
+from marqo.core.inference.embedding_models.open_clip_model import OPEN_CLIP
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.errors import InvalidModelPropertiesError
 from marqo.s2_inference.model_registry import _get_open_clip_properties
@@ -33,9 +33,9 @@ def test_load_OpenCLIPModelFromCheckPointMethod_success(self):
             "dimensions": 512
         }
 
-        with patch("marqo.core.inference.inference_models.open_clip_model.OPEN_CLIP._load_model_and_image_preprocessor_from_checkpoint", \
+        with patch("marqo.core.inference.embedding_models.open_clip_model.OPEN_CLIP._load_model_and_image_preprocessor_from_checkpoint", \
                    return_value=(MagicMock(), MagicMock())) as mock_load_method:
-            with patch("marqo.core.inference.inference_models.open_clip_model.OPEN_CLIP._load_tokenizer_from_checkpoint",
+            with patch("marqo.core.inference.embedding_models.open_clip_model.OPEN_CLIP._load_tokenizer_from_checkpoint",
                        return_value=MagicMock()) as mock_load_tokenizer:
                 with patch.object(MagicMock(), 'eval', return_value=None) as mock_eval:
                     model = OPEN_CLIP(model_properties=model_properties, device="cpu")
@@ -52,11 +52,11 @@ def test_load_OpenCLIPModelFromCheckPointParameters_success(self):
             "type": "open_clip",
             "dimensions": 512
         }
-        with patch("marqo.core.inference.inference_models.open_clip_model.open_clip.create_model", return_value=MagicMock()) \
+        with patch("marqo.core.inference.embedding_models.open_clip_model.open_clip.create_model", return_value=MagicMock()) \
                 as mock_create_model:
-            with patch("marqo.core.inference.inference_models.open_clip_model.open_clip.get_tokenizer", return_value=MagicMock()) \
+            with patch("marqo.core.inference.embedding_models.open_clip_model.open_clip.get_tokenizer", return_value=MagicMock()) \
                     as mock_tokenizer:
-                with patch("marqo.core.inference.inference_models.open_clip_model.download_model", return_value="my_test_model.pt"):
+                with patch("marqo.core.inference.embedding_models.open_clip_model.download_model", return_value="my_test_model.pt"):
                     with patch.object(MagicMock(), 'eval', return_value=None) as mock_eval:
                         model = OPEN_CLIP(model_properties=model_properties, device="cpu")
                         model.load()
@@ -88,11 +88,11 @@ def test_load_OpenCLIPModelFromCheckPointPreprocessConfig(self):
             "image_preprocessor": "SigLIP",
             "size": 322  # Override the default size 224
         }
-        with patch("marqo.core.inference.inference_models.open_clip_model.open_clip.create_model", return_value=MagicMock()) \
+        with patch("marqo.core.inference.embedding_models.open_clip_model.open_clip.create_model", return_value=MagicMock()) \
                 as mock_create_model:
-            with patch("marqo.core.inference.inference_models.open_clip_model.open_clip.get_tokenizer", return_value=MagicMock()) \
+            with patch("marqo.core.inference.embedding_models.open_clip_model.open_clip.get_tokenizer", return_value=MagicMock()) \
                     as mock_tokenizer:
-                with patch("marqo.core.inference.inference_models.open_clip_model.download_model", return_value="my_test_model.pt"):
+                with patch("marqo.core.inference.embedding_models.open_clip_model.download_model", return_value="my_test_model.pt"):
                     with patch.object(MagicMock(), 'eval', return_value=None) as mock_eval:
                         model = OPEN_CLIP(model_properties=model_properties, device="cpu")
                         model.load()
@@ -199,12 +199,12 @@ def test_load_OpenCLIPModel_from_local_path(self):
             "dimensions": 512,
             "type": "open_clip"
         }
-        with patch("marqo.core.inference.inference_models.open_clip_model.open_clip.create_model", return_value=MagicMock()) \
+        with patch("marqo.core.inference.embedding_models.open_clip_model.open_clip.create_model", return_value=MagicMock()) \
                 as mock_create_model:
-            with patch("marqo.core.inference.inference_models.open_clip_model.open_clip.get_tokenizer", return_value=MagicMock()) \
+            with patch("marqo.core.inference.embedding_models.open_clip_model.open_clip.get_tokenizer", return_value=MagicMock()) \
                     as mock_tokenizer:
                 with patch.object(MagicMock(), 'eval', return_value=None) as mock_eval:
-                    with patch("marqo.core.inference.inference_models.open_clip_model.os.path.exists",
+                    with patch("marqo.core.inference.embedding_models.open_clip_model.os.path.exists",
                                return_value=True) as mock_path_exists:
                         model = OPEN_CLIP(model_properties=model_properties, device="cpu")
                         model.load()
@@ -238,7 +238,7 @@ def test_load_OpenCLIPModel_with_auth_s3(self):
             aws_secret_access_key="my_secret_key",
         ))
 
-        with patch("marqo.core.inference.inference_models.open_clip_model.download_model") as mock_download_model:
+        with patch("marqo.core.inference.embedding_models.open_clip_model.download_model") as mock_download_model:
             # It's ok to return a RuntimeError as we are testing the download_model function
             with self.assertRaises(RuntimeError):
                 model = OPEN_CLIP(model_properties=model_properties, device="cpu", model_auth=model_auth)
@@ -267,7 +267,7 @@ def test_load_OpenCLIPModel_with_auth_hf(self):
 
         model_auth = ModelAuth(**{"hf": {"token":"my_hf_token"}})
 
-        with patch("marqo.core.inference.inference_models.open_clip_model.download_model") as mock_download_model:
+        with patch("marqo.core.inference.embedding_models.open_clip_model.download_model") as mock_download_model:
             # It's ok to return a RuntimeError as we are testing the download_model function
             with self.assertRaises(RuntimeError) as e:
                 model = OPEN_CLIP(model_properties=model_properties, device="cpu", model_auth=model_auth)
diff --git a/tests/core/inference/test_open_clip_model_properties.py b/tests/core/inference/test_open_clip_model_properties.py
index 9c43340dc..da17315e5 100644
--- a/tests/core/inference/test_open_clip_model_properties.py
+++ b/tests/core/inference/test_open_clip_model_properties.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from marqo.core.inference.inference_models.open_clip_model_properties import OpenCLIPModelProperties
+from marqo.core.inference.embedding_models.open_clip_model_properties import OpenCLIPModelProperties
 
 
 @pytest.mark.unittest
diff --git a/tests/s2_inference/test_clip_utils.py b/tests/s2_inference/test_clip_utils.py
index b4d9912f2..8f706f81d 100644
--- a/tests/s2_inference/test_clip_utils.py
+++ b/tests/s2_inference/test_clip_utils.py
@@ -9,7 +9,7 @@
 from marqo.api.exceptions import InternalError
 from marqo.s2_inference import clip_utils, types
 from marqo.s2_inference.clip_utils import CLIP, FP16_CLIP, MULTILINGUAL_CLIP
-from marqo.core.inference.inference_models.open_clip_model import OPEN_CLIP
+from marqo.core.inference.embedding_models.open_clip_model import OPEN_CLIP
 from marqo.s2_inference.configs import ModelCache
 from marqo.s2_inference.errors import ImageDownloadError
 from marqo.tensor_search.enums import ModelProperties
diff --git a/tests/tensor_search/test_model_auth.py b/tests/tensor_search/test_model_auth.py
index 049574ec0..3e99b0f07 100644
--- a/tests/tensor_search/test_model_auth.py
+++ b/tests/tensor_search/test_model_auth.py
@@ -14,7 +14,7 @@
 from marqo.api.exceptions import BadRequestError, ModelNotInCacheError
 from marqo.api.exceptions import InvalidArgError, IndexNotFoundError
 from marqo.core.inference.download_model_from_s3 import get_s3_model_absolute_cache_path
-from marqo.core.inference.inference_models.hugging_face_model import HuggingFaceModel
+from marqo.core.inference.embedding_models.hugging_face_model import HuggingFaceModel
 from marqo.core.inference.model_download import download_pretrained_from_url
 from marqo.core.models.add_docs_params import AddDocsParams
 from marqo.s2_inference.configs import ModelCache
@@ -1240,7 +1240,7 @@ def test_1_load_model_from_hf_zip_file_with_auth_search(self):
         with unittest.mock.patch('transformers.AutoModel.from_pretrained', mock_automodel_from_pretrained):
             with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained', mock_autotokenizer_from_pretrained):
                 with unittest.mock.patch('marqo.s2_inference.model_downloading.from_hf.hf_hub_download', mock_hf_hub_download):
-                    with unittest.mock.patch("marqo.core.inference.inference_models.hugging_face_model.HuggingFaceModel."
+                    with unittest.mock.patch("marqo.core.inference.embedding_models.hugging_face_model.HuggingFaceModel."
                     "extract_huggingface_archive", mock_extract_huggingface_archive):
                         try:
                             res = tensor_search.search(
@@ -1301,7 +1301,7 @@ def test_2_load_model_from_hf_zip_file_without_auth_search(self):
         with unittest.mock.patch('transformers.AutoModel.from_pretrained', mock_automodel_from_pretrained):
             with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained', mock_autotokenizer_from_pretrained):
                 with unittest.mock.patch('marqo.s2_inference.model_downloading.from_hf.hf_hub_download', mock_hf_hub_download):
-                    with unittest.mock.patch("marqo.core.inference.inference_models.hugging_face_model.HuggingFaceModel."
+                    with unittest.mock.patch("marqo.core.inference.embedding_models.hugging_face_model.HuggingFaceModel."
                     "extract_huggingface_archive", mock_extract_huggingface_archive):
                         try:
                             res = tensor_search.search(
@@ -1373,7 +1373,7 @@ def test_3_load_model_from_s3_zip_file_with_auth_search(self):
             with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained',mock_autotokenizer_from_pretrained):
                 with unittest.mock.patch('boto3.client', return_value=mock_s3_client) as mock_boto3_client:
                     with unittest.mock.patch("marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url", mock_download_pretrained_from_url):
-                        with unittest.mock.patch("marqo.core.inference.inference_models.hugging_face_model.HuggingFaceModel."
+                        with unittest.mock.patch("marqo.core.inference.embedding_models.hugging_face_model.HuggingFaceModel."
                         "extract_huggingface_archive", mock_extract_huggingface_archive):
                             try:
                                 res = tensor_search.search(
@@ -1428,7 +1428,7 @@ def test_4_load_model_from_public_url_zip_file_search(self):
 
         with mock.patch('transformers.AutoModel.from_pretrained', new=mock_automodel_from_pretrained):
             with mock.patch('marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url', new=mock_download):
-                with mock.patch("marqo.core.inference.inference_models.hugging_face_model.HuggingFaceModel."
+                with mock.patch("marqo.core.inference.embedding_models.hugging_face_model.HuggingFaceModel."
                 "extract_huggingface_archive", new=mock_extract_huggingface_archive):
                     res = tensor_search.search(config=self.config, text='hello', index_name=self.index_name_1)
 
@@ -1575,7 +1575,7 @@ def test_1_load_model_from_hf_zip_file_with_auth_add_documents(self):
         with unittest.mock.patch('transformers.AutoModel.from_pretrained', mock_automodel_from_pretrained):
             with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained', mock_autotokenizer_from_pretrained):
                 with unittest.mock.patch('marqo.s2_inference.model_downloading.from_hf.hf_hub_download', mock_hf_hub_download):
-                    with unittest.mock.patch("marqo.core.inference.inference_models.hugging_face_model.HuggingFaceModel."
+                    with unittest.mock.patch("marqo.core.inference.embedding_models.hugging_face_model.HuggingFaceModel."
                     "extract_huggingface_archive", mock_extract_huggingface_archive):
                         try:
                             self.add_documents(config=self.config, add_docs_params=AddDocsParams(
@@ -1636,7 +1636,7 @@ def test_2_load_model_from_hf_zip_file_without_auth_add_documents(self):
         with unittest.mock.patch('transformers.AutoModel.from_pretrained', mock_automodel_from_pretrained):
             with unittest.mock.patch('transformers.AutoTokenizer.from_pretrained', mock_autotokenizer_from_pretrained):
                 with unittest.mock.patch('marqo.s2_inference.model_downloading.from_hf.hf_hub_download', mock_hf_hub_download):
-                    with unittest.mock.patch("marqo.core.inference.inference_models.hugging_face_model.HuggingFaceModel."
+                    with unittest.mock.patch("marqo.core.inference.embedding_models.hugging_face_model.HuggingFaceModel."
                     "extract_huggingface_archive", mock_extract_huggingface_archive):
                         try:
                             self.add_documents(config=self.config, add_docs_params=AddDocsParams(
@@ -1708,7 +1708,7 @@ def test_3_load_model_from_s3_zip_file_with_auth_add_documents(self):
                     with unittest.mock.patch(
                             "marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url",
                             mock_download_pretrained_from_url):
-                        with unittest.mock.patch("marqo.core.inference.inference_models.hugging_face_model.HuggingFaceModel."
+                        with unittest.mock.patch("marqo.core.inference.embedding_models.hugging_face_model.HuggingFaceModel."
                         "extract_huggingface_archive",
                                                  mock_extract_huggingface_archive):
                             try:
@@ -1766,7 +1766,7 @@ def test_4_load_model_from_public_url_zip_file_add_documents(self):
 
         with mock.patch('transformers.AutoModel.from_pretrained', new=mock_automodel_from_pretrained):
             with mock.patch('marqo.s2_inference.processing.custom_clip_utils.download_pretrained_from_url', new=mock_download):
-                with mock.patch("marqo.core.inference.inference_models.hugging_face_model.HuggingFaceModel."
+                with mock.patch("marqo.core.inference.embedding_models.hugging_face_model.HuggingFaceModel."
                                 "extract_huggingface_archive", new=mock_extract_huggingface_archive):
                     self.add_documents(config=self.config, add_docs_params=AddDocsParams(
                         index_name=self.index_name_1, auto_refresh=True, docs=[{'a': 'b'}], device="cpu"))