Skip to content

Commit 0cbe9b6

Browse files
committed
Add new models nvidia, gte, linq
1 parent 19aefa3 commit 0cbe9b6

20 files changed

+315
-71
lines changed

mteb/models/arctic_models.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from mteb.model_meta import ModelMeta, sentence_transformers_loader
66

77
arctic_m_v1_5 = ModelMeta(
8-
loader=partial(
8+
loader=partial( # type: ignore
99
sentence_transformers_loader,
1010
model_name="Snowflake/snowflake-arctic-embed-m-v1.5",
1111
revision="97eab2e17fcb7ccb8bb94d6e547898fa1a6a0f47",

mteb/models/bge_models.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
model_prompts = {"query": "Represent this sentence for searching relevant passages: "}
88

99
bge_small_en_v1_5 = ModelMeta(
10-
loader=partial(
10+
loader=partial( # type: ignore
1111
sentence_transformers_loader,
1212
model_name="BAAI/bge-small-en-v1.5",
1313
revision="5c38ec7c405ec4b44b94cc5a9bb96e735b38267a",
@@ -30,7 +30,7 @@
3030
)
3131

3232
bge_base_en_v1_5 = ModelMeta(
33-
loader=partial(
33+
loader=partial( # type: ignore
3434
sentence_transformers_loader,
3535
model_name="BAAI/bge-base-en-v1.5",
3636
revision="a5beb1e3e68b9ab74eb54cfd186867f64f240e1a",
@@ -53,7 +53,7 @@
5353
)
5454

5555
bge_large_en_v1_5 = ModelMeta(
56-
loader=partial(
56+
loader=partial( # type: ignore
5757
sentence_transformers_loader,
5858
model_name="BAAI/bge-large-en-v1.5",
5959
revision="d4aa6901d3a41ba39fb536a557fa166f842b0e09",

mteb/models/cohere_models.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def encode(
7474
}
7575

7676
cohere_mult_3 = ModelMeta(
77-
loader=partial(
77+
loader=partial( # type: ignore
7878
CohereTextEmbeddingModel,
7979
model_name="embed-multilingual-v3.0",
8080
model_prompts=model_prompts,
@@ -95,7 +95,7 @@ def encode(
9595
)
9696

9797
cohere_eng_3 = ModelMeta(
98-
loader=partial(
98+
loader=partial( # type: ignore
9999
CohereTextEmbeddingModel,
100100
model_name="embed-multilingual-v3.0",
101101
model_prompts=model_prompts,

mteb/models/e5_instruct.py

+5-6
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,14 @@
1212
MISTRAL_LANGUAGES = ["eng_Latn", "fra_Latn", "deu_Latn", "ita_Latn", "spa_Latn"]
1313

1414

15-
def e5_instruction(instruction: str) -> str:
16-
return f"Instruct: {instruction}\nQuery: "
15+
E5_INSTRUCTION = "Instruct: {instruction}\nQuery: "
1716

1817

1918
e5_instruct = ModelMeta(
20-
loader=partial(
19+
loader=partial( # type: ignore
2120
instruct_wrapper,
2221
model_name_or_path="intfloat/multilingual-e5-large-instruct",
23-
instruction_template=e5_instruction,
22+
instruction_template=E5_INSTRUCTION,
2423
attn="cccc",
2524
pooling_method="mean",
2625
mode="embedding",
@@ -44,10 +43,10 @@ def e5_instruction(instruction: str) -> str:
4443
)
4544

4645
e5_mistral = ModelMeta(
47-
loader=partial(
46+
loader=partial( # type: ignore
4847
instruct_wrapper,
4948
model_name_or_path="intfloat/e5-mistral-7b-instruct",
50-
instruction_template=e5_instruction,
49+
instruction_template=E5_INSTRUCTION,
5150
attn="cccc",
5251
pooling_method="lasttoken",
5352
mode="embedding",

mteb/models/e5_models.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@
114114
}
115115

116116
e5_mult_small = ModelMeta(
117-
loader=partial(
117+
loader=partial( # type: ignore
118118
sentence_transformers_loader,
119119
model_name="intfloat/multilingual-e5-small",
120120
revision="fd1525a9fd15316a2d503bf26ab031a61d056e98",
@@ -137,7 +137,7 @@
137137
)
138138

139139
e5_mult_base = ModelMeta(
140-
loader=partial(
140+
loader=partial( # type: ignore
141141
sentence_transformers_loader,
142142
model_name="intfloat/multilingual-e5-base",
143143
model_prompts=model_prompts,
@@ -159,7 +159,7 @@
159159
)
160160

161161
e5_mult_large = ModelMeta(
162-
loader=partial(
162+
loader=partial( # type: ignore
163163
sentence_transformers_loader,
164164
model_name="intfloat/multilingual-e5-large",
165165
revision="ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb",
@@ -182,7 +182,7 @@
182182
)
183183

184184
e5_eng_small_v2 = ModelMeta(
185-
loader=partial(
185+
loader=partial( # type: ignore
186186
sentence_transformers_loader,
187187
model_name="intfloat/e5-small-v2",
188188
model_prompts=model_prompts,
@@ -204,7 +204,7 @@
204204
)
205205

206206
e5_eng_small = ModelMeta(
207-
loader=partial(
207+
loader=partial( # type: ignore
208208
sentence_transformers_loader,
209209
model_name="intfloat/e5-small",
210210
revision="e272f3049e853b47cb5ca3952268c6662abda68f",
@@ -227,7 +227,7 @@
227227
)
228228

229229
e5_eng_base_v2 = ModelMeta(
230-
loader=partial(
230+
loader=partial( # type: ignore
231231
sentence_transformers_loader,
232232
model_name="intfloat/e5-base-v2",
233233
revision="1c644c92ad3ba1efdad3f1451a637716616a20e8",
@@ -250,7 +250,7 @@
250250
)
251251

252252
e5_eng_large_v2 = ModelMeta(
253-
loader=partial(
253+
loader=partial( # type: ignore
254254
sentence_transformers_loader,
255255
model_name="intfloat/e5-large-v2",
256256
revision="b322e09026e4ea05f42beadf4d661fb4e101d311",

mteb/models/google_models.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def encode(
7878

7979
name = "text-embedding-004"
8080
google_emb_004 = ModelMeta(
81-
loader=partial(
81+
loader=partial( # type: ignore
8282
GoogleTextEmbeddingModel,
8383
model_name=name,
8484
model_prompts={

mteb/models/gritlm_models.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def gritlm_instruction(instruction: str = "") -> str:
1717

1818

1919
gritlm7b = ModelMeta(
20-
loader=partial(
20+
loader=partial( # type: ignore
2121
instruct_wrapper,
2222
model_name_or_path="GritLM/GritLM-7B",
2323
instruction_template=gritlm_instruction,
@@ -40,7 +40,7 @@ def gritlm_instruction(instruction: str = "") -> str:
4040
use_instructions=True,
4141
)
4242
gritlm8x7b = ModelMeta(
43-
loader=partial(
43+
loader=partial( # type: ignore
4444
instruct_wrapper,
4545
model_name_or_path="GritLM/GritLM-8x7B",
4646
instruction_template=gritlm_instruction,

mteb/models/gte_models.py

+65-3
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,21 @@
22

33
from functools import partial
44

5+
import torch
6+
57
from mteb.model_meta import ModelMeta
8+
from mteb.models.instruct_wrapper import instruct_wrapper
9+
10+
11+
def instruction_template(instruction: str) -> str:
12+
return f"Instruct: {instruction}\nQuery: " if instruction else ""
613

7-
from .instruct_wrapper import instruct_wrapper
814

915
gte_Qwen2_7B_instruct = ModelMeta(
10-
loader=partial(
16+
loader=partial( # type: ignore
1117
instruct_wrapper,
1218
model_name_or_path="Alibaba-NLP/gte-Qwen2-7B-instruct",
13-
instruction_template="Instruct: {instruction}\nQuery: ",
19+
instruction_template=instruction_template,
1420
attn="cccc",
1521
pooling_method="lasttoken",
1622
mode="embedding",
@@ -33,3 +39,59 @@
3339
framework=["Sentence Transformers", "PyTorch"],
3440
use_instructions=True,
3541
)
42+
43+
44+
gte_Qwen1_5_7B_instruct = ModelMeta(
45+
loader=partial( # type: ignore
46+
instruct_wrapper,
47+
model_name_or_path="Alibaba-NLP/gte-Qwen1.5-7B-instruct",
48+
instruction_template=instruction_template,
49+
attn="cccc",
50+
pooling_method="lasttoken",
51+
mode="embedding",
52+
torch_dtype="auto",
53+
normalized=True,
54+
),
55+
name="Alibaba-NLP/gte-Qwen1.5-7B-instruct",
56+
languages=["eng_Latn"],
57+
open_weights=True,
58+
revision="07d27e5226328010336563bc1b564a5e3436a298",
59+
release_date="2024-04-20", # initial commit of hf model.
60+
n_parameters=7_720_000_000,
61+
memory_usage=None,
62+
embed_dim=4096,
63+
license="apache-2.0",
64+
max_tokens=32768,
65+
reference="https://huggingface.co/Alibaba-NLP/gte-Qwen1.5-7B-instruct",
66+
similarity_fn_name="cosine",
67+
framework=["Sentence Transformers", "PyTorch"],
68+
use_instructions=True,
69+
)
70+
71+
72+
gte_Qwen2_1_5B_instruct = ModelMeta(
73+
loader=partial( # type: ignore
74+
instruct_wrapper,
75+
model_name_or_path="Alibaba-NLP/gte-Qwen2-1.5B-instruct",
76+
instruction_template=instruction_template,
77+
attn="cccc",
78+
pooling_method="lasttoken",
79+
mode="embedding",
80+
torch_dtype="auto",
81+
normalized=True,
82+
),
83+
name="Alibaba-NLP/gte-Qwen2-1.5B-instruct",
84+
languages=["eng_Latn"],
85+
open_weights=True,
86+
revision="c6c1b92f4a3e1b92b326ad29dd3c8433457df8dd",
87+
release_date="2024-07-29", # initial commit of hf model.
88+
n_parameters=1_780_000_000,
89+
memory_usage=None,
90+
embed_dim=8960,
91+
license="apache-2.0",
92+
max_tokens=131072,
93+
reference="https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct",
94+
similarity_fn_name="cosine",
95+
framework=["Sentence Transformers", "PyTorch"],
96+
use_instructions=True,
97+
)

mteb/models/jina_models.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ def encode(
191191

192192

193193
jina_embeddings_v3 = ModelMeta(
194-
loader=partial(
194+
loader=partial( # type: ignore
195195
JinaWrapper,
196196
model="jinaai/jina-embeddings-v3",
197197
revision="215a6e121fa0183376388ac6b1ae230326bfeaed",

mteb/models/linq_models.py

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
from __future__ import annotations
2+
3+
from functools import partial
4+
5+
import torch
6+
7+
from mteb.model_meta import ModelMeta
8+
from mteb.models.instruct_wrapper import instruct_wrapper
9+
10+
11+
def instruction_template(instruction: str) -> str:
12+
return f"Instruct: {instruction}\nQuery: " if instruction else ""
13+
14+
15+
Linq_Embed_Mistral = ModelMeta(
16+
loader=partial( # type: ignore
17+
instruct_wrapper,
18+
model_name_or_path="Linq-AI-Research/Linq-Embed-Mistral",
19+
instruction_template=instruction_template,
20+
attn="cccc",
21+
pooling_method="lasttoken",
22+
mode="embedding",
23+
torch_dtype=torch.bfloat16,
24+
normalized=True,
25+
),
26+
name="Linq-AI-Research/Linq-Embed-Mistral",
27+
languages=["eng_Latn"],
28+
open_weights=True,
29+
revision="0c1a0b0589177079acc552433cad51d7c9132379",
30+
release_date="2024-05-29", # initial commit of hf model.
31+
n_parameters=7_110_000_000,
32+
memory_usage=None,
33+
embed_dim=4096,
34+
license="cc-by-nc-4.0",
35+
max_tokens=32768,
36+
reference="https://huggingface.co/Linq-AI-Research/Linq-Embed-Mistral",
37+
similarity_fn_name="cosine",
38+
framework=["Sentence Transformers", "PyTorch"],
39+
use_instructions=True,
40+
)

mteb/models/mxbai_models.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from mteb.model_meta import ModelMeta, sentence_transformers_loader
66

77
mxbai_embed_large_v1 = ModelMeta(
8-
loader=partial(
8+
loader=partial( # type: ignore
99
sentence_transformers_loader,
1010
model_name="mixedbread-ai/mxbai-embed-large-v1",
1111
revision="990580e27d329c7408b3741ecff85876e128e203",

0 commit comments

Comments
 (0)