Fix embed not using cuda as default device when available 2.10 (#942)

What is the current behavior? (You can also link to an open issue here) when device is unset in the embed request, even though cuda is available, default device of embed is cpu. Other functionalities such as search, add docs are unaffected (defaults to cuda when available) What is the new behavior (if this is a feature change)? default device in embed will be cuda when it is avialable
marqo-ai · Aug 20, 2024 · fecb827 · fecb827
1 parent 465cc8e
commit fecb827
Show file tree

Hide file tree

Showing 5 changed files with 59 additions and 3 deletions.
diff --git a/.gitignore b/.gitignore
@@ -149,4 +149,6 @@ dump.rdb
 .DS_Store
 
 # Tester app for unit tests
-scripts/vespa_local/vespa_tester_app.zip
+scripts/vespa_local/vespa_tester_app.zip
+
+src/marqo/tensor_search/cache_dir/*
diff --git a/RELEASE.md b/RELEASE.md
@@ -1,3 +1,16 @@
+# Release 2.10.2
+
+ ## Bug fixes and minor changes
+ - Fix an issue where CUDA was not automatically selected as the default device for the `embed` endpoint, even when available [#941](https://github.com/marqo-ai/marqo/pull/941).
+
+# Release 2.10.1
+
+## Bug fixes and minor changes
+- Improve the clarity of the error message when Marqo can not download the provided image ([#905](https://github.com/marqo-ai/marqo/pull/905)).
+- Improve the error message in hybrid search to avoid confusion ([#900](https://github.com/marqo-ai/marqo/pull/900)).
+- Fix a bug where a `500` error is returned when an unsupported search method is provided. Marqo now correctly returns a `400` error ([#899](https://github.com/marqo-ai/marqo/pull/899)).
+- Fix a bug where a `500` error is returned when an invalid image URL with non-ASCII characters is provided. Marqo now encodes the image URL correctly ([#908](https://github.com/marqo-ai/marqo/pull/908)).
+
 # Release 2.10.0
 
 ## New features

diff --git a/src/marqo/core/embed/embed.py b/src/marqo/core/embed/embed.py
@@ -13,6 +13,7 @@
 from marqo.tensor_search.tensor_search_logging import get_logger
 from marqo.core.utils.prefix import determine_text_prefix, DeterminePrefixContentType
 from marqo.vespa.vespa_client import VespaClient
+from marqo.tensor_search import utils
 
 logger = get_logger(__name__)
 
@@ -64,7 +65,7 @@ def embed_content(
 
  # Set default device if not provided
  if device is None:
- device = self.default_device
+ device = utils.read_env_vars_and_defaults("MARQO_BEST_AVAILABLE_DEVICE")
 
 
  # Content validation is done in API model layer

diff --git a/src/marqo/version.py b/src/marqo/version.py
@@ -1,4 +1,4 @@
-__version__ = "2.10.1"
+__version__ = "2.10.2"
 
 def get_version() -> str:
  return f"{__version__}"
diff --git a/tests/tensor_search/integ_tests/test_embed.py b/tests/tensor_search/integ_tests/test_embed.py
@@ -20,6 +20,7 @@
 from marqo.vespa.models.query_result import Root, Child, RootFields
 from marqo.tensor_search.models.private_models import S3Auth, ModelAuth, HfAuth
 from marqo.api.models.embed_request import EmbedRequest
+from marqo.tensor_search import utils
 import os
 import pprint
 import unittest
@@ -150,6 +151,45 @@ def tearDown(self) -> None:
  super().tearDown()
  self.device_patcher.stop()
 
+ def test_embed_content_cuda_device_as_default(self):
+ """
+ Test that embed_content uses the default device when no device is specified.
+ """
+ for index in [self.unstructured_default_text_index, self.structured_default_text_index]:
+ with self.subTest(index=index.type):
+ expected_devices = ["cuda", "cpu"]
+ for expected_device in expected_devices:
+ with patch.dict(os.environ, {"MARQO_BEST_AVAILABLE_DEVICE": expected_device}):
+ with patch('marqo.tensor_search.tensor_search.run_vectorise_pipeline') as mock_vectorise:
+ mock_vectorise.return_value = {0: [0.1, 0.2, 0.3]}
+
+ embed_res = embed(
+ marqo_config=self.config,
+ index_name=index.name,
+ embedding_request=EmbedRequest(
+ content=["This is a test document"]
+ ),
+ device=None
+ )
+
+ # Check that run_vectorise_pipeline was called
+ mock_vectorise.assert_called_once()
+
+ # Get the arguments passed to run_vectorise_pipeline
+ args, kwargs = mock_vectorise.call_args
+
+ # Print the args and kwargs for debugging
+ print(f"args passed to run_vectorise_pipeline: {args}")
+ print(f"kwargs passed to run_vectorise_pipeline: {kwargs}")
+
+ # Check that the device passed to run_vectorise_pipeline matches the expected value
+ self.assertEqual(args[2], expected_device)
+
+ # Check the result
+ self.assertEqual(embed_res["content"], ["This is a test document"])
+ self.assertIsInstance(embed_res["embeddings"][0], list)
+ self.assertEqual(embed_res["embeddings"][0], [0.1, 0.2, 0.3])
+
  def test_embed_equivalent_to_add_docs(self):
  """
  Ensure that the embedding returned by embed endpoint matches the one created by add_docs.