Skip to content

Commit

Permalink
Add new models(FashionCLIP and visheration multilingual clip) from th…
Browse files Browse the repository at this point in the history
…e release 2.12
  • Loading branch information
wanliAlex authored Oct 16, 2024
2 parents e3ad7ea + 31679f4 commit c086822
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""
OpenCLIP model properties
This file contains properties for OpenCLIP models. It is intended to be used in conjunction with the model registry
and should not be used in isolation.
"""
from typing import Dict, List, Optional

from marqo_commons.model_registry.model_properties_data.onnx_clip_properties import OnnxClipModelProperties
from marqo_commons.model_registry.model_properties_object import ModelProperties, VectorNumericType, Modality, \
ModelType, T
from marqo_commons.model_registry.utils import convert_model_properties_to_dict


class LanguagebindModelProperties(ModelProperties):
vector_numeric_type: VectorNumericType = VectorNumericType.float32
default_memory_size: float = 8
modality: List[Modality]
type: ModelType = ModelType.languagebind
pretrained: Optional[str]
notes: str = ""

@classmethod
def get_all_model_properties_objects(cls) -> Dict[str, T]:
# use this link to find all the model_configs
# https://github.com/mlfoundations/open_clip/tree/main/src/open_clip/model_configs
return {
'LanguageBind/Video_V1.5_FT_Audio_FT_Image': LanguagebindModelProperties(
name="LanguageBind/Video_V1.5_FT_Audio_FT_Image",
dimensions=768,
modality=[Modality.video, Modality.audio, Modality.text, Modality.image],
),
'LanguageBind/Video_V1.5_FT_Audio_FT': LanguagebindModelProperties(
name="LanguageBind/Video_V1.5_FT_Audio_FT",
dimensions=768,
modality=[Modality.video, Modality.audio, Modality.text],
),
'LanguageBind/Video_V1.5_FT_Image': LanguagebindModelProperties(
name="LanguageBind/Video_V1.5_FT_Image",
dimensions=768,
modality=[Modality.video, Modality.text, Modality.image],
),
'LanguageBind/Audio_FT_Image': LanguagebindModelProperties(
name="LanguageBind/Audio_FT_Image",
dimensions=768,
modality=[Modality.audio, Modality.text, Modality.image],
),
'LanguageBind/Audio_FT': LanguagebindModelProperties(
name="LanguageBind/Audio_FT",
dimensions=768,
modality=[Modality.audio, Modality.text],
),
'LanguageBind/Video_V1.5_FT': LanguagebindModelProperties(
name="LanguageBind/Video_V1.5_FT",
dimensions=768,
modality=[Modality.video, Modality.text],
),
}


@convert_model_properties_to_dict
def _get_languagebind_properties() -> Dict:
return LanguagebindModelProperties.get_all_model_properties_objects()
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
This file contains properties for OpenCLIP models. It is intended to be used in conjunction with the model registry
and should not be used in isolation.
"""
from typing import Dict, List
from typing import Dict, List, Optional

from marqo_commons.model_registry.model_properties_data.onnx_clip_properties import OnnxClipModelProperties
from marqo_commons.model_registry.model_properties_object import ModelProperties, VectorNumericType, Modality, \
Expand All @@ -17,7 +17,7 @@ class OpenClipModelProperties(ModelProperties):
default_memory_size: float = 1.0
modality: List[Modality] = [Modality.text, Modality.image]
type: ModelType = ModelType.open_clip
pretrained: str
pretrained: Optional[str]
notes: str = ""

@classmethod
Expand Down Expand Up @@ -454,8 +454,38 @@ def get_all_model_properties_objects(cls) -> Dict[str, T]:
dimensions=512,
notes="open_clip model: ViT-B-32-256/datacomp_s34b_b86k",
pretrained="datacomp_s34b_b86k"
),
"Marqo/marqo-fashionCLIP": OpenClipModelProperties(
name = "hf-hub:Marqo/marqo-fashionCLIP",
dimensions = 512,
note = "Marqo's fashionCLIP model",
),
"Marqo/marqo-fashionSigLIP": OpenClipModelProperties(
name = "hf-hub:Marqo/marqo-fashionSigLIP",
dimensions = 768,
note = "Marqo's fashionSigLIP model",
),
"visheratin/nllb-clip-base-siglip": OpenClipModelProperties(
name="hf-hub:visheratin/nllb-clip-base-siglip",
dimensions=768,
note="A multilingual CLIP model",
),
"visheratin/nllb-siglip-mrl-base": OpenClipModelProperties(
name="hf-hub:visheratin/nllb-siglip-mrl-base",
dimensions=768,
note="A multilingual CLIP model",
),
"visheratin/nllb-clip-large-siglip": OpenClipModelProperties(
name="hf-hub:visheratin/nllb-clip-large-siglip",
dimensions=1152,
note="A multilingual CLIP model",
),
"visheratin/nllb-siglip-mrl-large": OpenClipModelProperties(
name="hf-hub:visheratin/nllb-siglip-mrl-large",
dimensions=1152,
note="A multilingual CLIP model",
)
}
}


@convert_model_properties_to_dict
Expand Down
3 changes: 3 additions & 0 deletions src/marqo_commons/model_registry/model_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from marqo_commons.model_registry.model_properties_data.onnx_clip_properties import _get_onnx_clip_properties
from marqo_commons.model_registry.model_properties_data.random_properties import _get_random_properties
from marqo_commons.model_registry.model_properties_data.no_model import _get_no_model_properties
from marqo_commons.model_registry.model_properties_data.languagebind_model_properties import _get_languagebind_properties


# we need to keep track of the embed dim and model load functions/classes
Expand All @@ -33,6 +34,7 @@ def get_model_properties_dict() -> Dict:
multilingual_clip_model_properties = _get_multilingual_clip_properties()
fp16_clip_model_properties = _get_fp16_clip_properties()
no_model_properties = _get_no_model_properties()
languagebind_model_properties = _get_languagebind_properties()

# combine the above dicts
model_properties = dict(clip_model_properties.items())
Expand All @@ -46,6 +48,7 @@ def get_model_properties_dict() -> Dict:
model_properties.update(multilingual_clip_model_properties)
model_properties.update(fp16_clip_model_properties)
model_properties.update(no_model_properties)
model_properties.update(languagebind_model_properties)

return model_properties

Expand Down
3 changes: 3 additions & 0 deletions src/marqo_commons/shared_utils/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
class Modality(str, Enum):
image = "image"
text = "text"
audio = "audio"
video = "video"


class ModelType(str, Enum):
Expand All @@ -18,6 +20,7 @@ class ModelType(str, Enum):
sbert = "sbert"
test = "test"
no_model = "no_model"
languagebind = "languagebind"


class VectorNumericType(str, Enum):
Expand Down
2 changes: 2 additions & 0 deletions tests/test_model_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from marqo_commons.model_registry.model_properties_data.sbert_onnx_properties import _get_sbert_onnx_properties
from marqo_commons.model_registry.model_properties_data.sbert_properties import _get_sbert_properties
from marqo_commons.model_registry.model_properties_data.test_properties import _get_sbert_test_properties
from marqo_commons.model_registry.model_properties_data.languagebind_model_properties import _get_languagebind_properties

from marqo_commons.model_registry.model_registry import get_model_properties_dict, get_model_properties_json
from marqo_commons.shared_utils.enums import ModelType
Expand All @@ -38,6 +39,7 @@ def test_models_count(self):
total_count_from_all_model_properties += len(_get_multilingual_clip_properties())
total_count_from_all_model_properties += len(_get_fp16_clip_properties())
total_count_from_all_model_properties += len(_get_no_model_properties())
total_count_from_all_model_properties += len(_get_languagebind_properties())
self.assertEqual(
total_count_from_all_model_properties, total_count_from_model_registry,
"Number of models in get_model_properties_dict is not equal to total "
Expand Down

0 comments on commit c086822

Please sign in to comment.