basf
diff --git a/‎.github/workflows/leaderboard_refresh.yaml‎
Lines changed: 16 additions & 0 deletions b/‎.github/workflows/leaderboard_refresh.yaml‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎.github/workflows/model_loading.yml‎
Lines changed: 24 additions & 0 deletions b/‎.github/workflows/model_loading.yml‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 8 additions & 1 deletion b/‎Makefile‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎docs/adding_a_model.md‎
Lines changed: 4 additions & 4 deletions b/‎docs/adding_a_model.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎mteb/evaluation/evaluators/BitextMiningEvaluator.py‎
Lines changed: 7 additions & 3 deletions b/‎mteb/evaluation/evaluators/BitextMiningEvaluator.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎mteb/evaluation/evaluators/RetrievalEvaluator.py‎
Lines changed: 12 additions & 7 deletions b/‎mteb/evaluation/evaluators/RetrievalEvaluator.py‎
Lines changed: 12 additions & 7 deletions
@@ -0,0 +1,16 @@
+name: Daily Space Rebuild
+on:
+  schedule:
+    # Runs at midnight Pacific Time (8 AM UTC)
+    - cron: '0 8 * * *'
+  workflow_dispatch:  # Allows manual triggering
+
+jobs:
+  rebuild:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Trigger Factory Rebuild
+        run: |
+          curl -X POST \
+            "https://huggingface.co/api/spaces/mteb/leaderboard_2_demo/restart?factory=true" \
+            -H "Authorization: Bearer ${{ secrets.HF_TOKEN }}"
@@ -0,0 +1,24 @@
+name: Model Loading
+
+on:
+  pull_request:
+    paths:
+      - 'mteb/models/**.py'
+
+jobs:
+  extract-and-run:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v3
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.10'
+        cache: 'pip'
+
+    - name: Install dependencies and run tests
+      run: |
+        make model-load-test
@@ -145,3 +145,6 @@ tests/create_meta/model_card.md
 # removed results from mteb repo they are now available at: https://github.com/embeddings-benchmark/results
 results/
 uv.lock
+
+# model loading tests
+model_names.txt
@@ -35,4 +35,11 @@ pr:
 build-docs:
 	@echo "--- 📚 Building documentation ---"
 	# since we do not have a documentation site, this just build tables for the .md files
-	python docs/create_tasks_table.py
+	python docs/create_tasks_table.py
+
+
+model-load-test:
+	@echo "--- 🚀 Running model load test ---"
+	pip install ".[dev, speedtask, pylate,gritlm,xformers,model2vec]"
+	python scripts/extract_model_names.py
+	python tests/test_models/model_loading.py --model_name_file scripts/model_names.txt
@@ -30,10 +30,10 @@ These will save the results in a folder called `results/{model_name}/{model_revi
 
 2. **Push Results to the Leaderboard**
 
-To add results to the public leaderboard you can push your results to the [results repository](https://github.com/embeddings-benchmark/results) afterwards they will appear on the leaderboard after a day.
+To add results to the public leaderboard you can push your results to the [results repository](https://github.com/embeddings-benchmark/results) via a PR. Once merged they will appear on the leaderboard after a day.
 
 
-3. (Optional) **Add the results using to the model card:**
+3. (Optional) **Add results to the model card:**
 
 `mteb` implements a cli for adding results to the model card:
 
@@ -49,7 +49,7 @@ If the readme already exists:
 mteb create_meta --results_folder results/{model_name}/{model_revision} --output_path model_card.md --from_existing your_existing_readme.md 
 ```
 
-Note that if you can run the model on many tasks, this can lead to an excessively large readme frontmatter.
+Note that running the model on many tasks may lead to a huge readme front matter.
 
 4. **Wait for a refresh the leaderboard:**
 
@@ -70,4 +70,4 @@ The leaderboard [automatically refreshes daily](https://github.com/embeddings-be
 
     ###### Instantiating the Model with Prompts
 
-    If you are unable to directly add the prompts in the model configuration, you can instantiate the model using the `sentence_transformers_loader` and pass `prompts` as an argument. For more details, see the `mteb/models/bge_models.py` file.
+    If you are unable to directly add the prompts in the model configuration, you can instantiate the model using the `sentence_transformers_loader` and pass `prompts` as an argument. For more details, see the `mteb/models/bge_models.py` file.
@@ -44,9 +44,13 @@ def __call__(self, model: Encoder, *, encode_kwargs: dict[str, Any] = {}):
 
     def compute_metrics(self, model: Encoder, encode_kwargs: dict[str, Any] = {}):
         pair_elements = {p for pair in self.pairs for p in pair}
-        subsets = [
-            col for col in self.sentences.features.keys() if col in pair_elements
-        ]
+        if isinstance(self.sentences, Dataset):
+            subsets = [
+                col for col in self.sentences.features.keys() if col in pair_elements
+            ]
+        else:
+            # BUCC outputs a dict instead of a Dataset
+            subsets = list(pair_elements)
         n_subsets = len(subsets)
 
         embeddings = {}
 
@@ -167,12 +167,12 @@ def search(
                     self.corpus_embeddings[request_qid].append(sub_corpus_embeddings)
 
             # Compute similarites using self defined similarity otherwise default to cosine-similarity
-            similarity_scores = cos_sim(query_embeddings, sub_corpus_embeddings)
             if hasattr(self.model, "similarity"):
                 similarity_scores = self.model.similarity(
-                    float(self.model.similarity(e1, e2))
-                    for e1, e2 in zip(query_embeddings, sub_corpus_embeddings)
+                    query_embeddings, sub_corpus_embeddings
                 )
+            else:
+                similarity_scores = cos_sim(query_embeddings, sub_corpus_embeddings)
             is_nan = torch.isnan(similarity_scores)
             if is_nan.sum() > 0:
                 logger.warning(
@@ -307,15 +307,17 @@ def search_cross_encoder(
             assert (
                 len(queries_in_pair) == len(corpus_in_pair) == len(instructions_in_pair)
             )
+            corpus_in_pair = corpus_to_str(list(corpus_in_pair))
 
             if hasattr(self.model, "model") and isinstance(
                 self.model.model, CrossEncoder
             ):
                 # can't take instructions, so add them here
-                queries_in_pair = [
-                    f"{q} {i}".strip()
-                    for i, q in zip(instructions_in_pair, queries_in_pair)
-                ]
+                if instructions_in_pair[0] is not None:
+                    queries_in_pair = [
+                        f"{q} {i}".strip()
+                        for i, q in zip(instructions_in_pair, queries_in_pair)
+                    ]
                 scores = self.model.predict(list(zip(queries_in_pair, corpus_in_pair)))  # type: ignore
             else:
                 # may use the instructions in a unique way, so give them also
@@ -374,6 +376,9 @@ def __init__(self, model, **kwargs):
         self.save_corpus_embeddings = kwargs.get("save_corpus_embeddings", False)
         self.corpus_embeddings = {}
 
+        if hasattr(self.model, "similarity") and callable(self.model.similarity):
+            self.similarity = self.model.similarity
+
     def encode_corpus(
         self,
         corpus: list[dict[str, str]],