diff --git a/.gitignore b/.gitignore index c6b115c56..b1d549014 100644 --- a/.gitignore +++ b/.gitignore @@ -149,4 +149,6 @@ dump.rdb .DS_Store # Tester app for unit tests -scripts/vespa_local/vespa_tester_app.zip \ No newline at end of file +scripts/vespa_local/vespa_tester_app.zip + +src/marqo/tensor_search/cache_dir/* \ No newline at end of file diff --git a/RELEASE.md b/RELEASE.md index 1fb75a5f2..3549ae920 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,16 @@ +# Release 2.10.2 + + ## Bug fixes and minor changes + - Fix an issue where CUDA was not automatically selected as the default device for the `embed` endpoint, even when available [#941](https://github.com/marqo-ai/marqo/pull/941). + +# Release 2.10.1 + +## Bug fixes and minor changes +- Improve the clarity of the error message when Marqo can not download the provided image ([#905](https://github.com/marqo-ai/marqo/pull/905)). +- Improve the error message in hybrid search to avoid confusion ([#900](https://github.com/marqo-ai/marqo/pull/900)). +- Fix a bug where a `500` error is returned when an unsupported search method is provided. Marqo now correctly returns a `400` error ([#899](https://github.com/marqo-ai/marqo/pull/899)). +- Fix a bug where a `500` error is returned when an invalid image URL with non-ASCII characters is provided. Marqo now encodes the image URL correctly ([#908](https://github.com/marqo-ai/marqo/pull/908)). + # Release 2.10.0 ## New features diff --git a/src/marqo/core/embed/embed.py b/src/marqo/core/embed/embed.py index d8c00815c..fa8430a49 100644 --- a/src/marqo/core/embed/embed.py +++ b/src/marqo/core/embed/embed.py @@ -13,6 +13,7 @@ from marqo.tensor_search.tensor_search_logging import get_logger from marqo.core.utils.prefix import determine_text_prefix, DeterminePrefixContentType from marqo.vespa.vespa_client import VespaClient +from marqo.tensor_search import utils logger = get_logger(__name__) @@ -64,7 +65,7 @@ def embed_content( # Set default device if not provided if device is None: - device = self.default_device + device = utils.read_env_vars_and_defaults("MARQO_BEST_AVAILABLE_DEVICE") # Content validation is done in API model layer diff --git a/src/marqo/version.py b/src/marqo/version.py index 23ca83757..0cbbb5e0c 100644 --- a/src/marqo/version.py +++ b/src/marqo/version.py @@ -1,4 +1,4 @@ -__version__ = "2.10.1" +__version__ = "2.10.2" def get_version() -> str: return f"{__version__}" diff --git a/tests/tensor_search/integ_tests/test_embed.py b/tests/tensor_search/integ_tests/test_embed.py index 38e4e3527..eb07fd068 100644 --- a/tests/tensor_search/integ_tests/test_embed.py +++ b/tests/tensor_search/integ_tests/test_embed.py @@ -20,6 +20,7 @@ from marqo.vespa.models.query_result import Root, Child, RootFields from marqo.tensor_search.models.private_models import S3Auth, ModelAuth, HfAuth from marqo.api.models.embed_request import EmbedRequest +from marqo.tensor_search import utils import os import pprint import unittest @@ -150,6 +151,45 @@ def tearDown(self) -> None: super().tearDown() self.device_patcher.stop() + def test_embed_content_cuda_device_as_default(self): + """ + Test that embed_content uses the default device when no device is specified. + """ + for index in [self.unstructured_default_text_index, self.structured_default_text_index]: + with self.subTest(index=index.type): + expected_devices = ["cuda", "cpu"] + for expected_device in expected_devices: + with patch.dict(os.environ, {"MARQO_BEST_AVAILABLE_DEVICE": expected_device}): + with patch('marqo.tensor_search.tensor_search.run_vectorise_pipeline') as mock_vectorise: + mock_vectorise.return_value = {0: [0.1, 0.2, 0.3]} + + embed_res = embed( + marqo_config=self.config, + index_name=index.name, + embedding_request=EmbedRequest( + content=["This is a test document"] + ), + device=None + ) + + # Check that run_vectorise_pipeline was called + mock_vectorise.assert_called_once() + + # Get the arguments passed to run_vectorise_pipeline + args, kwargs = mock_vectorise.call_args + + # Print the args and kwargs for debugging + print(f"args passed to run_vectorise_pipeline: {args}") + print(f"kwargs passed to run_vectorise_pipeline: {kwargs}") + + # Check that the device passed to run_vectorise_pipeline matches the expected value + self.assertEqual(args[2], expected_device) + + # Check the result + self.assertEqual(embed_res["content"], ["This is a test document"]) + self.assertIsInstance(embed_res["embeddings"][0], list) + self.assertEqual(embed_res["embeddings"][0], [0.1, 0.2, 0.3]) + def test_embed_equivalent_to_add_docs(self): """ Ensure that the embedding returned by embed endpoint matches the one created by add_docs.