diff --git a/tests/conftest.py b/tests/conftest.py index 99a04bfb1..292170a7d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,17 +2,20 @@ def pytest_addoption(parser): parser.addoption("--largemodel", action="store_true", default = False) + parser.addoption("--slow", action="store_true", default = False) def pytest_configure(config): config.addinivalue_line("markers", "largemodel: mark test as largemodels") + config.addinivalue_line("markers", "slow: mark test as slow") def pytest_collection_modifyitems(config, items): - if config.getoption("--largemodel"): - # --largemodel given in cli: do not skip largemodel tests - return skip_largemodel = pytest.mark.skip(reason="need --largemodel option to run") + skip_slow = pytest.mark.skip(reason="need --slow option to run") + for item in items: - if "largemodel" in item.keywords: - item.add_marker(skip_largemodel) \ No newline at end of file + if "largemodel" in item.keywords and not config.getoption("--largemodel"): + item.add_marker(skip_largemodel) + if "slow" in item.keywords and not config.getoption("--slow"): + item.add_marker(skip_slow) \ No newline at end of file diff --git a/tests/tensor_search/test_score_modifiers_search.py b/tests/tensor_search/test_score_modifiers_search.py index b4b1877fb..ffaa45f7e 100644 --- a/tests/tensor_search/test_score_modifiers_search.py +++ b/tests/tensor_search/test_score_modifiers_search.py @@ -1,21 +1,41 @@ import copy from unittest import mock +import time +import os +import numpy as np + from tests.utils.transition import add_docs_caller from marqo.errors import IndexNotFoundError, InvalidArgError, BackendCommunicationError from marqo.tensor_search import tensor_search +from marqo.tensor_search.models.add_docs_objects import AddDocsParams from marqo.tensor_search.models.api_models import ScoreModifier from marqo.tensor_search.enums import TensorField, IndexSettingsField, SearchMethod from tests.marqo_test import MarqoTestCase +import pytest from marqo.tensor_search.models.api_models import BulkSearchQuery, BulkSearchQueryEntity import pprint from marqo.tensor_search.tensor_search import (_create_normal_tensor_search_query, _vector_text_search_query_verbose, - _generate_vector_text_search_query_for_verbose_one, + _generate_vector_text_search_query_for_verbose_one, _create_score_modifiers_tensor_search_query) from marqo.tensor_search.models.score_modifiers_object import ScoreModifierOperator -import numpy as np -from pydantic.error_wrappers import ValidationError -import os + +def get_expected_score(doc, ori_score, score_modifiers: ScoreModifier): + add = 0.0 + if score_modifiers.multiply_score_by is not None: + for config in score_modifiers.multiply_score_by: + if config.field_name in doc: + if isinstance(doc[config.field_name], (int, float)): + ori_score = ori_score * config.weight * doc[config.field_name] + + if score_modifiers.add_to_score is not None: + for config in score_modifiers.add_to_score: + if config.field_name in doc: + if isinstance(doc[config.field_name], (int, float)): + add = add + config.weight * doc[config.field_name] + + return max(0.0, (ori_score + add)) + class TestScoreModifiersSearch(MarqoTestCase): @@ -42,10 +62,10 @@ def setUp(self): # miss one weight "multiply_score_by": [{"field_name": "multiply_1", - "weight": 1,}, - {"field_name": "multiply_2",}], + "weight": 1, }, + {"field_name": "multiply_2", }], "add_to_score": [ - {"field_name": "add_1", "weight" : -3, + {"field_name": "add_1", "weight": -3, }, {"field_name": "add_2", "weight": 1, }] @@ -67,8 +87,8 @@ def setUp(self): # remove one field "multiply_score_by": [ - {"field_name": "multiply_2", - "weight": 1.2}], + {"field_name": "multiply_2", + "weight": 1.2}], "add_to_score": [ {"field_name": "add_1", "weight": -3, }, @@ -145,7 +165,7 @@ def setUp(self): # Any tests that call add_document, search, bulk_search need this env var self.device_patcher = mock.patch.dict(os.environ, {"MARQO_BEST_AVAILABLE_DEVICE": "cpu"}) self.device_patcher.start() - + def tearDown(self) -> None: try: tensor_search.delete_index(config=self.config, index_name=self.index_name) @@ -159,87 +179,70 @@ def test_search_result_not_affected_if_fields_not_exist(self): "my_image_field": "https://marqo-assets.s3.amazonaws.com/tests/images/image2.jpg", "_id": "0", "filter": "original" - }, + }, ] add_docs_caller(config=self.config, index_name=self.index_name, docs=documents, - non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", - "filter"], auto_refresh=True) - + non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", + "filter"], auto_refresh=True) normal_res = tensor_search.search(config=self.config, index_name=self.index_name, - text = "what is the rider doing?", score_modifiers=None, result_count=10) + text="what is the rider doing?", score_modifiers=None, result_count=10) normal_score = normal_res["hits"][0]["_score"] modifier_res = tensor_search.search(config=self.config, index_name=self.index_name, - text = "what is the rider doing?", - score_modifiers=ScoreModifier(**{ - "multiply_score_by": - [{"field_name": "multiply_1", - "weight": 1,}, - {"field_name": "multiply_2",} - ], - "add_to_score": [ - {"field_name": "add_1", - }, - {"field_name": "add_2", - } - ] - })) + text="what is the rider doing?", + score_modifiers=ScoreModifier(**{ + "multiply_score_by": + [{"field_name": "multiply_1", + "weight": 1, }, + {"field_name": "multiply_2", } + ], + "add_to_score": [ + {"field_name": "add_1", + }, + {"field_name": "add_2", + } + ] + })) modifier_score = modifier_res["hits"][0]["_score"] self.assertEqual(normal_score, modifier_score) - def get_expected_score(self, doc, ori_score, score_modifiers: ScoreModifier): - add = 0.0 - if score_modifiers.multiply_score_by is not None: - for config in score_modifiers.multiply_score_by: - if config.field_name in doc: - if isinstance(doc[config.field_name], (int, float)): - ori_score = ori_score * config.weight * doc[config.field_name] - - if score_modifiers.add_to_score is not None: - for config in score_modifiers.add_to_score: - if config.field_name in doc: - if isinstance(doc[config.field_name], (int, float)): - add = add + config.weight * doc[config.field_name] - - return max(0.0, (ori_score + add)) - def test_search_score_modified_as_expected(self): documents = self.test_score_documents add_docs_caller(config=self.config, index_name=self.index_name, docs=documents, - non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", - "filter"], auto_refresh=True) + non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", + "filter"], auto_refresh=True) normal_res = tensor_search.search(config=self.config, index_name=self.index_name, text="what is the rider doing?", score_modifiers=None, result_count=10) normal_score = normal_res["hits"][0]["_score"] - epsilon = 1e-5 score_modifiers_list = self.test_valid_score_modifiers_list for score_modifiers in score_modifiers_list: modifier_res = tensor_search.search(config=self.config, index_name=self.index_name, - text = "what is the rider doing?", - score_modifiers=score_modifiers, result_count=10) + text="what is the rider doing?", + score_modifiers=score_modifiers, result_count=10) assert len(modifier_res["hits"]) == len(documents) for result in modifier_res["hits"]: - expected_score = self.get_expected_score(result, normal_score, score_modifiers) + expected_score = get_expected_score(result, normal_score, score_modifiers) if abs(expected_score - result["_score"]) > epsilon: raise AssertionError - + tensor_search.bulk_search(marqo_config=self.config, query=BulkSearchQuery( queries=[ - BulkSearchQueryEntity(index=self.index_name, q="what is the rider doing?", limit=10, scoreModifiers=score_modifiers), + BulkSearchQueryEntity(index=self.index_name, q="what is the rider doing?", limit=10, + scoreModifiers=score_modifiers), ] )) assert len(modifier_res["hits"]) == len(documents) for result in modifier_res["hits"]: - expected_score = self.get_expected_score(result, normal_score, score_modifiers) + expected_score = get_expected_score(result, normal_score, score_modifiers) if abs(expected_score - result["_score"]) > epsilon: raise AssertionError @@ -247,8 +250,8 @@ def test_search_score_modified_as_expected_with_filter(self): documents = self.test_score_documents add_docs_caller(config=self.config, index_name=self.index_name, docs=documents, - non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", - "filter"], auto_refresh=True) + non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", + "filter"], auto_refresh=True) normal_res = tensor_search.search(config=self.config, index_name=self.index_name, text="what is the rider doing?", score_modifiers=None, result_count=10) normal_score = normal_res["hits"][0]["_score"] @@ -264,7 +267,7 @@ def test_search_score_modified_as_expected_with_filter(self): assert len(modifier_res["hits"]) == 1 assert modifier_res["hits"][0]["_id"] == "0" for result in modifier_res["hits"]: - expected_score = self.get_expected_score(result, normal_score, score_modifiers) + expected_score = get_expected_score(result, normal_score, score_modifiers) if abs(expected_score - result["_score"]) > epsilon: raise AssertionError @@ -272,8 +275,8 @@ def test_search_score_modified_as_expected_with_searchable_attributes(self): documents = self.test_score_documents add_docs_caller(config=self.config, index_name=self.index_name, docs=documents, - non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", - "filter"], auto_refresh=True) + non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", + "filter"], auto_refresh=True) normal_res = tensor_search.search(config=self.config, index_name=self.index_name, text="what is the rider doing?", score_modifiers=None, result_count=10, searchable_attributes=["my_image_field"]) @@ -289,7 +292,7 @@ def test_search_score_modified_as_expected_with_searchable_attributes(self): assert len(modifier_res["hits"]) == len(documents) for result in modifier_res["hits"]: - expected_score = self.get_expected_score(result, normal_score, score_modifiers) + expected_score = get_expected_score(result, normal_score, score_modifiers) if abs(expected_score - result["_score"]) > epsilon: raise AssertionError @@ -299,10 +302,10 @@ def test_search_score_modified_as_expected_with_attributes_to_retrieve(self): del invalid_fields["_id"] add_docs_caller(config=self.config, index_name=self.index_name, docs=documents, - non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", - "filter"], auto_refresh=True) + non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", + "filter"], auto_refresh=True) normal_res = tensor_search.search(config=self.config, index_name=self.index_name, - text="what is the rider doing?", score_modifiers=None, result_count=10,) + text="what is the rider doing?", score_modifiers=None, result_count=10, ) normal_score = normal_res["hits"][0]["_score"] epsilon = 1e-5 @@ -319,7 +322,7 @@ def test_search_score_modified_as_expected_with_attributes_to_retrieve(self): assert field not in result # need to get the original doc with score modifier fields to compute expected score original_doc = [doc for doc in documents if doc["_id"] == result["_id"]][0] - expected_score = self.get_expected_score(original_doc, normal_score, score_modifiers) + expected_score = get_expected_score(original_doc, normal_score, score_modifiers) if abs(expected_score - result["_score"]) > epsilon: raise AssertionError @@ -377,8 +380,8 @@ def test_search_score_modified_as_expected_with_skipped_fields(self): ] add_docs_caller(config=self.config, index_name=self.index_name, docs=documents, - non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", - "filter"], auto_refresh=True) + non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", + "filter"], auto_refresh=True) normal_res = tensor_search.search(config=self.config, index_name=self.index_name, text="what is the rider doing?", score_modifiers=None, result_count=10) @@ -388,12 +391,12 @@ def test_search_score_modified_as_expected_with_skipped_fields(self): score_modifiers_list = self.test_valid_score_modifiers_list for score_modifiers in score_modifiers_list: modifier_res = tensor_search.search(config=self.config, index_name=self.index_name, - text = "what is the rider doing?", - score_modifiers=score_modifiers, result_count=10) + text="what is the rider doing?", + score_modifiers=score_modifiers, result_count=10) assert len(modifier_res["hits"]) == len(documents) for result in modifier_res["hits"]: - expected_score = self.get_expected_score(result, normal_score, score_modifiers) + expected_score = get_expected_score(result, normal_score, score_modifiers) if abs(expected_score - result["_score"]) > epsilon: raise AssertionError @@ -484,7 +487,7 @@ def test_expected_error_raised(self): }, { "field_name": "reputation-test", - },], + }, ], "add_to_score": [ {"field_name": "rate", }] @@ -503,7 +506,7 @@ def test_expected_error_raised(self): {"field_name": "rate", }] }, - { # empty + { # empty }, { # one part to be empty "multiply_score_by": [], @@ -518,9 +521,9 @@ def test_expected_error_raised(self): ] add_docs_caller(config=self.config, index_name=self.index_name, docs=documents, - non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", - "filter"], auto_refresh=True) - + non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", + "filter"], auto_refresh=True) + for invalid_score_modifiers in invalid_score_modifiers_list: try: v = ScoreModifier(**invalid_score_modifiers) @@ -537,21 +540,24 @@ def test_normal_query_body_is_called(self): }, ] add_docs_caller(config=self.config, index_name=self.index_name, docs=documents, - non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", - "filter"], auto_refresh=True) + non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", + "filter"], auto_refresh=True) + def pass_create_normal_tensor_search_query(*arg, **kwargs): return _create_normal_tensor_search_query(*arg, **kwargs) mock_create_normal_tensor_search_query = mock.MagicMock() mock_create_normal_tensor_search_query.side_effect = pass_create_normal_tensor_search_query - @mock.patch("marqo.tensor_search.tensor_search._create_normal_tensor_search_query", mock_create_normal_tensor_search_query) + @mock.patch("marqo.tensor_search.tensor_search._create_normal_tensor_search_query", + mock_create_normal_tensor_search_query) def run(): tensor_search.search(config=self.config, index_name=self.index_name, - text="what is the rider doing?", score_modifiers=None, result_count=10) + text="what is the rider doing?", score_modifiers=None, result_count=10) mock_create_normal_tensor_search_query.assert_called() return True + assert run() @@ -580,10 +586,10 @@ def setUp(self): # miss one weight "multiply_score_by": [{"field_name": "multiply_1", - "weight": 1,}, - {"field_name": "multiply_2",}], + "weight": 1, }, + {"field_name": "multiply_2", }], "add_to_score": [ - {"field_name": "add_1", "weight" : -3, + {"field_name": "add_1", "weight": -3, }, {"field_name": "add_2", "weight": 1, }] @@ -605,8 +611,8 @@ def setUp(self): # remove one field "multiply_score_by": [ - {"field_name": "multiply_2", - "weight": 1.2}], + {"field_name": "multiply_2", + "weight": 1.2}], "add_to_score": [ {"field_name": "add_1", "weight": -3, }, @@ -683,14 +689,14 @@ def setUp(self): # Any tests that call add_document, search, bulk_search need this env var self.device_patcher = mock.patch.dict(os.environ, {"MARQO_BEST_AVAILABLE_DEVICE": "cpu"}) self.device_patcher.start() - + def tearDown(self) -> None: try: tensor_search.delete_index(config=self.config, index_name=self.index_name) except: pass self.device_patcher.stop() - + def test_bulk_search_result_not_affected_if_fields_not_exist(self): documents = [ {"my_text_field": "A rider is riding a horse jumping over the barrier.", @@ -701,8 +707,8 @@ def test_bulk_search_result_not_affected_if_fields_not_exist(self): ] add_docs_caller(config=self.config, index_name=self.index_name, docs=documents, - non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", - "filter"], auto_refresh=True) + non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", + "filter"], auto_refresh=True) bulk_search_query = BulkSearchQuery( queries=[ @@ -713,7 +719,7 @@ def test_bulk_search_result_not_affected_if_fields_not_exist(self): ) ] ) - + normal_res = tensor_search.bulk_search(marqo_config=self.config, query=bulk_search_query) normal_score = normal_res["result"][0]["hits"][0]["_score"] @@ -725,14 +731,14 @@ def test_bulk_search_result_not_affected_if_fields_not_exist(self): scoreModifiers=ScoreModifier(**{ "multiply_score_by": [{"field_name": "multiply_1", - "weight": 1,}, - {"field_name": "multiply_2",} - ], + "weight": 1, }, + {"field_name": "multiply_2", } + ], "add_to_score": [ {"field_name": "add_1", - }, + }, {"field_name": "add_2", - } + } ] }) ) @@ -758,13 +764,13 @@ def get_expected_score(self, doc, ori_score, score_modifiers: ScoreModifier): add = add + config.weight * doc[config.field_name] return max(0.0, (ori_score + add)) - + def test_bulk_search_score_modified_as_expected(self): documents = self.test_score_documents add_docs_caller(config=self.config, index_name=self.index_name, docs=documents, - non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", - "filter"], auto_refresh=True) + non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", + "filter"], auto_refresh=True) bulk_search_query = BulkSearchQuery( queries=[ @@ -796,7 +802,7 @@ def test_bulk_search_score_modified_as_expected(self): assert len(modifier_res["result"][0]["hits"]) == len(documents) for result in modifier_res["result"][0]["hits"]: - expected_score = self.get_expected_score(result, normal_score, score_modifiers) + expected_score = get_expected_score(result, normal_score, score_modifiers) if abs(expected_score - result["_score"]) > epsilon: raise AssertionError @@ -804,8 +810,8 @@ def test_bulk_search_score_modified_as_expected_with_filter(self): documents = self.test_score_documents add_docs_caller(config=self.config, index_name=self.index_name, docs=documents, - non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", - "filter"], auto_refresh=True) + non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", + "filter"], auto_refresh=True) bulk_search_query = BulkSearchQuery( queries=[ @@ -839,16 +845,16 @@ def test_bulk_search_score_modified_as_expected_with_filter(self): assert len(modifier_res["result"][0]["hits"]) == 1 assert modifier_res["result"][0]["hits"][0]["_id"] == "0" for result in modifier_res["result"][0]["hits"]: - expected_score = self.get_expected_score(result, normal_score, score_modifiers) + expected_score = get_expected_score(result, normal_score, score_modifiers) if abs(expected_score - result["_score"]) > epsilon: raise AssertionError - + def test_bulk_search_score_modified_as_expected_with_searchable_attributes(self): documents = self.test_score_documents add_docs_caller(config=self.config, index_name=self.index_name, docs=documents, - non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", - "filter"], auto_refresh=True) + non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", + "filter"], auto_refresh=True) normal_res = tensor_search.search(config=self.config, index_name=self.index_name, text="what is the rider doing?", score_modifiers=None, result_count=10, searchable_attributes=["my_image_field"]) @@ -863,7 +869,7 @@ def test_bulk_search_score_modified_as_expected_with_searchable_attributes(self) ) ] ) - )["result"][0] + )["result"][0] normal_score = normal_res["hits"][0]["_score"] epsilon = 1e-5 @@ -883,7 +889,7 @@ def test_bulk_search_score_modified_as_expected_with_searchable_attributes(self) assert len(modifier_res["hits"]) == len(documents) for result in modifier_res["hits"]: - expected_score = self.get_expected_score(result, normal_score, score_modifiers) + expected_score = get_expected_score(result, normal_score, score_modifiers) if abs(expected_score - result["_score"]) > epsilon: raise AssertionError @@ -893,10 +899,10 @@ def test_bulk_search_score_modified_as_expected_with_attributes_to_retrieve(self del invalid_fields["_id"] add_docs_caller(config=self.config, index_name=self.index_name, docs=documents, - non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", - "filter"], auto_refresh=True) + non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", + "filter"], auto_refresh=True) normal_res = tensor_search.search(config=self.config, index_name=self.index_name, - text="what is the rider doing?", score_modifiers=None, result_count=10,) + text="what is the rider doing?", score_modifiers=None, result_count=10, ) normal_score = normal_res["hits"][0]["_score"] epsilon = 1e-5 @@ -921,7 +927,7 @@ def test_bulk_search_score_modified_as_expected_with_attributes_to_retrieve(self assert field not in result # need to get the original doc with score modifier fields to compute expected score original_doc = [doc for doc in documents if doc["_id"] == result["_id"]][0] - expected_score = self.get_expected_score(original_doc, normal_score, score_modifiers) + expected_score = get_expected_score(original_doc, normal_score, score_modifiers) if abs(expected_score - result["_score"]) > epsilon: raise AssertionError @@ -979,8 +985,8 @@ def test_bulk_search_score_modified_as_expected_with_skipped_fields(self): ] add_docs_caller(config=self.config, index_name=self.index_name, docs=documents, - non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", - "filter"], auto_refresh=True) + non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", + "filter"], auto_refresh=True) normal_res = tensor_search.bulk_search(marqo_config=self.config, query=BulkSearchQuery( queries=[ @@ -1010,7 +1016,7 @@ def test_bulk_search_score_modified_as_expected_with_skipped_fields(self): assert len(modifier_res["hits"]) == len(documents) for result in modifier_res["hits"]: - expected_score = self.get_expected_score(result, normal_score, score_modifiers) + expected_score = get_expected_score(result, normal_score, score_modifiers) if abs(expected_score - result["_score"]) > epsilon: raise AssertionError @@ -1023,15 +1029,17 @@ def test_bulk_search_normal_query_body_is_called(self): }, ] add_docs_caller(config=self.config, index_name=self.index_name, docs=documents, - non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", - "filter"], auto_refresh=True) + non_tensor_fields=["multiply_1", "multiply_2", "add_1", "add_2", + "filter"], auto_refresh=True) + def pass_create_normal_tensor_search_query(*arg, **kwargs): return _create_normal_tensor_search_query(*arg, **kwargs) mock_create_normal_tensor_search_query = mock.MagicMock() mock_create_normal_tensor_search_query.side_effect = pass_create_normal_tensor_search_query - @mock.patch("marqo.tensor_search.tensor_search._create_normal_tensor_search_query", mock_create_normal_tensor_search_query) + @mock.patch("marqo.tensor_search.tensor_search._create_normal_tensor_search_query", + mock_create_normal_tensor_search_query) def run(): tensor_search.bulk_search(marqo_config=self.config, query=BulkSearchQuery( queries=[ @@ -1046,6 +1054,7 @@ def run(): mock_create_normal_tensor_search_query.assert_called() return True + assert run() def test_score_modifier_vector_text_search_verbose(self): @@ -1057,8 +1066,11 @@ def test_score_modifier_vector_text_search_verbose(self): mock_generate_verbose_one_body = mock.MagicMock() mock_generate_verbose_one_body.side_effect = _generate_vector_text_search_query_for_verbose_one - @mock.patch('marqo.tensor_search.tensor_search._generate_vector_text_search_query_for_verbose_one', mock_generate_verbose_one_body) - @mock.patch('marqo.tensor_search.tensor_search._vector_text_search_query_verbose', mock_vector_text_search_verbose) + + @mock.patch('marqo.tensor_search.tensor_search._generate_vector_text_search_query_for_verbose_one', + mock_generate_verbose_one_body) + @mock.patch('marqo.tensor_search.tensor_search._vector_text_search_query_verbose', + mock_vector_text_search_verbose) @mock.patch('marqo.tensor_search.tensor_search.pprint.pprint', mock_pprint) def run(): for verbose in [0, 1, 2]: @@ -1086,7 +1098,8 @@ def run(): assert verbose_kwargs["verbose"] == verbose assert verbose_kwargs["body"][0]["index"] == self.index_name assert "knn" in \ - verbose_kwargs["body"][1]["query"]["function_score"]["query"]["nested"]["query"]["function_score"][ + verbose_kwargs["body"][1]["query"]["function_score"]["query"]["nested"]["query"][ + "function_score"][ "query"] assert mock_pprint.call_count == 2 @@ -1101,6 +1114,7 @@ def run(): mock_vector_text_search_verbose.reset_mock() mock_pprint.reset_mock() return True + assert run() def test_score_modifier_with_changing_weights(self): @@ -1122,12 +1136,14 @@ def test_score_modifier_with_changing_weights(self): # By default, Opensearch only allow 75 recompiling in 5 minutes. for _ in range(300): res = tensor_search.search(config=self.config, text="random text", score_modifiers=ScoreModifier(**{ - "multiply_score_by": [{"field_name": f"multiply_{np.random.randint(1, 100)}", "weight": np.random.rand()}, - {"field_name": f"multiply_{np.random.randint(1, 100)}", "weight": np.random.rand()}], + "multiply_score_by": [ + {"field_name": f"multiply_{np.random.randint(1, 100)}", "weight": np.random.rand()}, + {"field_name": f"multiply_{np.random.randint(1, 100)}", "weight": np.random.rand()}], "add_to_score": [{"field_name": f"add_{np.random.randint(1, 100)}", "weight": np.random.rand()}, {"field_name": f"add_{np.random.randint(1, 100)}", "weight": np.random.rand()}] }), index_name=self.index_name) + @pytest.mark.slow def test_too_many_dynamic_script_compilation_error(self): """To test the dynamic script compilation errors in Opensearch. The error is expected to be raised when there are too many dynamic script compilation in a short time.""" @@ -1158,34 +1174,44 @@ def test_too_many_dynamic_script_compilation_error(self): except BackendCommunicationError as e: assert "Too many dynamic script compilations" in str(e.message) break + # wait for 5 minutes to reset the dynamic script compilation limit + time.sleep(300) def test_score_modifiers_script(self): """Ensure that the script generated by score modifiers is correct.""" mock_generate_script_score = mock.MagicMock() generated_script_score = [] + def record_script_score_output(*args, **kwargs): generated_script_score.append(_create_score_modifiers_tensor_search_query(*args, **kwargs)) return generated_script_score[-1] + mock_generate_script_score.side_effect = record_script_score_output - @mock.patch("marqo.tensor_search.tensor_search._create_score_modifiers_tensor_search_query", mock_generate_script_score) + + @mock.patch("marqo.tensor_search.tensor_search._create_score_modifiers_tensor_search_query", + mock_generate_script_score) def run(): first_call = tensor_search.search(config=self.config, text="random text", score_modifiers=ScoreModifier(**{ - "multiply_score_by": [{"field_name": f"multiply_{np.random.randint(1, 100)}", "weight": np.random.rand()}, - {"field_name": f"multiply_{np.random.randint(1, 100)}", "weight": np.random.rand()}], + "multiply_score_by": [ + {"field_name": f"multiply_{np.random.randint(1, 100)}", "weight": np.random.rand()}, + {"field_name": f"multiply_{np.random.randint(1, 100)}", "weight": np.random.rand()}], "add_to_score": [{"field_name": f"add_{np.random.randint(1, 100)}", "weight": np.random.rand()}, {"field_name": f"add_{np.random.randint(1, 100)}", "weight": np.random.rand()}] }), index_name=self.index_name) second_call = tensor_search.search(config=self.config, text="random text", score_modifiers=ScoreModifier(**{ - "multiply_score_by": [{"field_name": f"multiply_{np.random.randint(1, 100)}", "weight": np.random.rand()}, - {"field_name": f"multiply_{np.random.randint(1, 100)}", "weight": np.random.rand()}], + "multiply_score_by": [ + {"field_name": f"multiply_{np.random.randint(1, 100)}", "weight": np.random.rand()}, + {"field_name": f"multiply_{np.random.randint(1, 100)}", "weight": np.random.rand()}], "add_to_score": [{"field_name": f"add_{np.random.randint(1, 100)}", "weight": np.random.rand()}, {"field_name": f"add_{np.random.randint(1, 100)}", "weight": np.random.rand()}] }), index_name=self.index_name) - assert generated_script_score[0]["query"]["function_score"]["query"]["nested"]["query"]["function_score"]["functions"][0]["script_score"]["script"]["source"] == \ - generated_script_score[1]["query"]["function_score"]["query"]["nested"]["query"]["function_score"]["functions"][0]["script_score"]["script"]["source"] + assert generated_script_score[0]["query"]["function_score"]["query"]["nested"]["query"]["function_score"][ + "functions"][0]["script_score"]["script"]["source"] == \ + generated_script_score[1]["query"]["function_score"]["query"]["nested"]["query"]["function_score"][ + "functions"][0]["script_score"]["script"]["source"] return True assert run() @@ -1288,4 +1314,81 @@ def test_zero_weight_multiply_score_by_script_score(self): def test_negative_weight_add_to_score_script_score(self): modifier = ScoreModifier(add_to_score=[ScoreModifierOperator(field_name="test2", weight=-3.0)]) result = modifier.to_script_score() - self.assertEqual(result["params"]["add_weight_0"], -3.0) \ No newline at end of file + self.assertEqual(result["params"]["add_weight_0"], -3.0) + + +class TestScoreModifierSearchScore(MarqoTestCase): + """This test ensures that scoreModifier modifies the score as expected. + As this test requires different setups, we put it in a separate class.""" + + def setUp(self): + self.index_name = "test_score_modifier_search_score" + self.test_score_documents = { + "my_text_field": "A rider is riding a horse jumping over the barrier.", + "_id": "test_1" + } + self.query = "what is the rider doing?" + for i in range(50): + # We are not using a random seed here as this test should pass for all randomness + self.test_score_documents[f"multiply_{i}"] = float((np.random.rand() - 0.5) * 10) # [-5, 5) + self.test_score_documents[f"add_{i}"] = float((np.random.rand() - 0.5) * 10) + + try: + tensor_search.delete_index(self.config, self.index_name) + except IndexNotFoundError: + pass + + self.device_patcher = mock.patch.dict(os.environ, {"MARQO_BEST_AVAILABLE_DEVICE": "cpu"}) + self.device_patcher.start() + + tensor_search.create_vector_index(config=self.config, index_name=self.index_name) + tensor_search.add_documents(config=self.config, add_docs_params=AddDocsParams( + index_name=self.index_name, + docs=[self.test_score_documents], + tensor_fields = ["my_text_field"], + non_tensor_fields = None, + auto_refresh=True, + device="cpu" + )) + + res = tensor_search.search(config=self.config, index_name=self.index_name, text = self.query)["hits"] + assert res[0]["_id"] == "test_1" + + self.original_score = res[0]["_score"] + + def tearDown(self) -> None: + try: + tensor_search.delete_index(config=self.config, index_name=self.index_name) + except: + pass + self.device_patcher.stop() + + def test_score_modifiers_search_score(self): + """We select 10, 15, 20, 25 random multiply fields and add fields to verify the score modifiers. + Number of fields should change too much to avoid excessive compilation error in Opensearch.""" + for number_of_fields in [10, 15, 20, 25]: + number_of_tests = 20 + for _ in range(number_of_tests): + + score_modifiers = { + "multiply_score_by": [], + "add_to_score": [] + } + + multiply_indices = np.random.choice(50, number_of_fields, replace=False) + add_indices = np.random.choice(50, number_of_fields, replace=False) + + for i, j in zip(multiply_indices, add_indices): + score_modifiers["multiply_score_by"].append({"field_name": f"multiply_{i}", "weight": np.random.rand()}) + score_modifiers["add_to_score"].append({"field_name": f"add_{j}", "weight": np.random.rand()}) + + res = tensor_search.search(config=self.config, index_name=self.index_name, text=self.query, + score_modifiers=ScoreModifier(**score_modifiers))["hits"] + + returned_score = res[0]["_score"] + true_score = get_expected_score(self.test_score_documents, self.original_score, ScoreModifier(**score_modifiers)) + + assert np.allclose(returned_score, true_score, atol=1e-5) + + + diff --git a/tox.ini b/tox.ini index f0fcd7f55..d2fb47835 100644 --- a/tox.ini +++ b/tox.ini @@ -8,7 +8,7 @@ whitelist_externals = python deps = pytest - -rrequirements.txt + -r requirements.dev.txt setenv = PYTHONPATH = {toxinidir}{/}src:{toxinidir} commands =