From 306c60ea91c2831b471d60315bd24b153b3e6660 Mon Sep 17 00:00:00 2001 From: Sam Lijin Date: Fri, 18 Oct 2024 18:25:14 -0700 Subject: [PATCH 1/7] allow serializing and deserializing image/audio via pydantic --- docs/docs/snippets/supported-types.mdx | 49 ++++++++ engine/baml-lib/baml-types/src/media.rs | 8 +- engine/language_client_python/Cargo.toml | 4 + engine/language_client_python/pyproject.toml | 5 + engine/language_client_python/src/errors.rs | 6 +- .../language_client_python/src/types/audio.rs | 35 +++--- .../language_client_python/src/types/image.rs | 37 +++--- .../src/types/media_repr.rs | 115 ++++++++++++++++++ .../language_client_python/src/types/mod.rs | 1 + 9 files changed, 218 insertions(+), 42 deletions(-) create mode 100644 engine/language_client_python/src/types/media_repr.rs diff --git a/docs/docs/snippets/supported-types.mdx b/docs/docs/snippets/supported-types.mdx index 18b2a2527..c376fcee5 100644 --- a/docs/docs/snippets/supported-types.mdx +++ b/docs/docs/snippets/supported-types.mdx @@ -40,6 +40,25 @@ See [Union(|)](#union-) for more details. See [calling a function with multimodal types](/docs/snippets/calling-baml/multi-modal) and [testing image inputs](/docs/snippets/test-cases#images) + + BAML's multimodal types are designed for ease of use: we have deliberately made it + easy for you to construct a `image` or `audio` instance from a URL. Under the + hood, depending on the model you're using, BAML may need to download the image + and transcode it (usually as base64) for the model to consume. + + This ease-of-use does come with some tradeoffs; namely, if you construct + an `image` or `audio` instance using untrusted user input, you may be exposing + yourself to [server-side request forgery (SSRF) attacks][ssrf]. Attackers may be + able to fetch files on your internal network, on external networks using your + application's identity, or simply excessively drive up your cloud network + bandwidth bill. + + To prevent this, we recommend only using URLs from trusted sources/users or + validating them using allowlists or denylists. + +[ssrf]: https://portswigger.net/web-security/ssrf + + ### `image` You can use an image like this for models that support them: @@ -114,6 +133,36 @@ end ``` + +If using Pydantic, the following are valid ways to construct the `Image` type. + +```json +{ + "url": "https://upload.wikimedia.org/wikipedia/en/4/4d/Shrek_%28character%29.png" +} +``` + +```json +{ + "url": "https://upload.wikimedia.org/wikipedia/en/4/4d/Shrek_%28character%29.png", + "mime_type": "image/png" +} +``` + +```json +{ + "base64": "iVBORw0K....", +} +``` + +```json +{ + "base64": "iVBORw0K....", + "mime_type": "image/png" +} +``` + + ### `audio` Example diff --git a/engine/baml-lib/baml-types/src/media.rs b/engine/baml-lib/baml-types/src/media.rs index 3f0cc3c81..d81441e51 100644 --- a/engine/baml-lib/baml-types/src/media.rs +++ b/engine/baml-lib/baml-types/src/media.rs @@ -18,8 +18,12 @@ impl fmt::Display for BamlMediaType { } } -// We rely on the serialization and deserialization of this struct for: -// - prompt rendering (going into minijinja rendering and coming out) +/// We rely on the serialization and deserialization of this struct for: +/// +/// - prompt rendering (going into minijinja rendering and coming out) +/// i.e. when we render a prompt, minijinja operates on a string; that +/// string needs to encode BamlMedia instances, and this is how we do +/// that #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct BamlMedia { pub media_type: BamlMediaType, diff --git a/engine/language_client_python/Cargo.toml b/engine/language_client_python/Cargo.toml index 710b3216b..92c6f3f26 100644 --- a/engine/language_client_python/Cargo.toml +++ b/engine/language_client_python/Cargo.toml @@ -10,6 +10,10 @@ license = "Apache-2.0" name = "baml_py" crate-type = ["cdylib"] +[lints.rust] +dead_code = "deny" +unused_imports = "deny" + [dependencies] anyhow.workspace = true baml-types.workspace = true diff --git a/engine/language_client_python/pyproject.toml b/engine/language_client_python/pyproject.toml index b89f7fd93..e93d8f4c8 100644 --- a/engine/language_client_python/pyproject.toml +++ b/engine/language_client_python/pyproject.toml @@ -17,3 +17,8 @@ features = ["pyo3/extension-module"] [project.scripts] baml-cli = "baml_py:invoke_runtime_cli" + +# NOTE: dependencies is _deliberately_ empty so that different flavors of BAML +# don't have to share dependencies, e.g. we currently generate python/pydantic +# but if we generate python/vanilla in the future, we don't want to mess +# with that. diff --git a/engine/language_client_python/src/errors.rs b/engine/language_client_python/src/errors.rs index 52336e9e4..96d528436 100644 --- a/engine/language_client_python/src/errors.rs +++ b/engine/language_client_python/src/errors.rs @@ -1,12 +1,8 @@ use baml_runtime::{ errors::ExposedError, internal::llm_client::LLMResponse, scope_diagnostics::ScopeStack, }; -use pyo3::prelude::pyclass; use pyo3::types::PyModule; -use pyo3::{ - create_exception, py_run, pyfunction, pymodule, wrap_pyfunction, wrap_pymodule, Bound, PyClass, - PyErr, PyResult, Python, -}; +use pyo3::{create_exception, pymodule, Bound, PyErr, PyResult, Python}; create_exception!(baml_py, BamlError, pyo3::exceptions::PyException); // Existing exception definitions diff --git a/engine/language_client_python/src/types/audio.rs b/engine/language_client_python/src/types/audio.rs index bf25b02fb..e9c584cb5 100644 --- a/engine/language_client_python/src/types/audio.rs +++ b/engine/language_client_python/src/types/audio.rs @@ -1,9 +1,12 @@ use baml_types::BamlMediaContent; -use pyo3::prelude::{pymethods, PyAnyMethods, PyModule, PyResult}; +use pyo3::prelude::{pymethods, PyResult}; use pyo3::types::PyType; -use pyo3::{Bound, Py, PyAny, PyObject, Python, ToPyObject}; +use pyo3::{Bound, PyAny, PyObject, Python}; +use pythonize::{depythonize_bound, pythonize}; use crate::errors::BamlError; + +use super::media_repr::{self, UserFacingBamlMedia}; crate::lang_wrapper!(BamlAudioPy, baml_types::BamlMedia); #[pymethods] @@ -63,31 +66,29 @@ impl BamlAudioPy { } } - // Makes it work with Pydantic #[classmethod] pub fn __get_pydantic_core_schema__( _cls: Bound<'_, PyType>, _source_type: Bound<'_, PyAny>, _handler: Bound<'_, PyAny>, ) -> PyResult { - Python::with_gil(|py| { - let code = r#" -from pydantic_core import core_schema - -def get_schema(): - # No validation - return core_schema.any_schema() + media_repr::__get_pydantic_core_schema__(_cls, _source_type, _handler) + } -ret = get_schema() - "#; - // py.run(code, None, Some(ret_dict)); - let fun: Py = PyModule::from_code_bound(py, code, "", "")? - .getattr("ret")? - .into(); - Ok(fun.to_object(py)) // Return the PyObject + #[staticmethod] + fn baml_deserialize(data: PyObject, py: Python<'_>) -> PyResult { + let data: UserFacingBamlMedia = depythonize_bound(data.into_bound(py))?; + Ok(BamlAudioPy { + inner: data.to_baml_media(baml_types::BamlMediaType::Audio), }) } + pub fn baml_serialize(&self, py: Python<'_>) -> PyResult { + let s: UserFacingBamlMedia = (&self.inner).try_into().map_err(BamlError::from_anyhow)?; + let s = serde_json::to_value(&s).map_err(|e| BamlError::from_anyhow(e.into()))?; + Ok(pythonize(py, &s)?) + } + pub fn __eq__(&self, other: &Self) -> bool { self.inner == other.inner } diff --git a/engine/language_client_python/src/types/image.rs b/engine/language_client_python/src/types/image.rs index b98e5fe7a..a4523da24 100644 --- a/engine/language_client_python/src/types/image.rs +++ b/engine/language_client_python/src/types/image.rs @@ -1,8 +1,11 @@ -use pyo3::prelude::{pymethods, PyAnyMethods, PyModule, PyResult}; +use pyo3::prelude::{pymethods, PyResult}; use pyo3::types::PyType; -use pyo3::{Bound, Py, PyAny, PyObject, Python, ToPyObject}; +use pyo3::{Bound, PyAny, PyObject, Python}; +use pythonize::{depythonize_bound, pythonize}; -use crate::errors::BamlInvalidArgumentError; +use crate::errors::{BamlError, BamlInvalidArgumentError}; + +use super::media_repr::{self, UserFacingBamlMedia}; crate::lang_wrapper!(BamlImagePy, baml_types::BamlMedia); #[pymethods] @@ -62,31 +65,29 @@ impl BamlImagePy { } } - // Makes it work with Pydantic #[classmethod] pub fn __get_pydantic_core_schema__( _cls: Bound<'_, PyType>, _source_type: Bound<'_, PyAny>, _handler: Bound<'_, PyAny>, ) -> PyResult { - Python::with_gil(|py| { - let code = r#" -from pydantic_core import core_schema - -def get_schema(): - # No validation - return core_schema.any_schema() + media_repr::__get_pydantic_core_schema__(_cls, _source_type, _handler) + } -ret = get_schema() - "#; - // py.run(code, None, Some(ret_dict)); - let fun: Py = PyModule::from_code_bound(py, code, "", "")? - .getattr("ret")? - .into(); - Ok(fun.to_object(py)) // Return the PyObject + #[staticmethod] + fn baml_deserialize(data: PyObject, py: Python<'_>) -> PyResult { + let data: UserFacingBamlMedia = depythonize_bound(data.into_bound(py))?; + Ok(BamlImagePy { + inner: data.to_baml_media(baml_types::BamlMediaType::Image), }) } + pub fn baml_serialize(&self, py: Python<'_>) -> PyResult { + let s: UserFacingBamlMedia = (&self.inner).try_into().map_err(BamlError::from_anyhow)?; + let s = serde_json::to_value(&s).map_err(|e| BamlError::from_anyhow(e.into()))?; + Ok(pythonize(py, &s)?) + } + pub fn __eq__(&self, other: &Self) -> bool { self.inner == other.inner } diff --git a/engine/language_client_python/src/types/media_repr.rs b/engine/language_client_python/src/types/media_repr.rs new file mode 100644 index 000000000..0855ec245 --- /dev/null +++ b/engine/language_client_python/src/types/media_repr.rs @@ -0,0 +1,115 @@ +use anyhow::Result; +use baml_types::{BamlMedia, BamlMediaContent, BamlMediaType, MediaBase64, MediaUrl}; +use pyo3::{ + types::{PyAnyMethods, PyModule, PyType}, + Bound, Py, PyAny, PyObject, PyResult, Python, ToPyObject, +}; +use serde::{Deserialize, Serialize}; + +/// We rely on the serialization and deserialization of this struct for: +/// +/// - pydantic serialization (JSON->FastAPI->Pydantic->baml_py), so that +/// users can include BAML types directly in their user-facing requests +#[derive(Debug, Serialize, Deserialize)] +pub struct UserFacingBamlMedia { + #[serde(skip_serializing_if = "Option::is_none")] + pub mime_type: Option, + #[serde(flatten)] + pub content: UserFacingBamlMediaContent, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(untagged)] +pub enum UserFacingBamlMediaContent { + Url { url: String }, + Base64 { base64: String }, +} + +impl UserFacingBamlMedia { + pub fn to_baml_media(self, media_type: BamlMediaType) -> BamlMedia { + BamlMedia { + media_type, + mime_type: self.mime_type, + content: match self.content { + UserFacingBamlMediaContent::Url { url } => BamlMediaContent::Url(MediaUrl { url }), + UserFacingBamlMediaContent::Base64 { base64 } => { + BamlMediaContent::Base64(MediaBase64 { base64 }) + } + }, + } + } +} + +impl TryInto for &BamlMedia { + type Error = anyhow::Error; + + fn try_into(self) -> Result { + Ok(UserFacingBamlMedia { + mime_type: self.mime_type.clone(), + content: match &self.content { + BamlMediaContent::Url(url) => UserFacingBamlMediaContent::Url { + url: url.url.clone(), + }, + BamlMediaContent::Base64(base64) => UserFacingBamlMediaContent::Base64 { + base64: base64.base64.clone(), + }, + BamlMediaContent::File(_) => { + anyhow::bail!("Cannot convert file media to user facing media") + } + }, + }) + } +} + +/// This function is used for Pydantic compatibility in three ways: +/// +/// - allows constructing Pydantic models containing a BamlImagePy instance +/// - allows FastAPI requests to deserialize BamlImagePy instances in JSON format +/// - allows serializing BamlImagePy instances in JSON format +pub fn __get_pydantic_core_schema__( + _cls: Bound<'_, PyType>, + _source_type: Bound<'_, PyAny>, + _handler: Bound<'_, PyAny>, +) -> PyResult { + Python::with_gil(|py| { + let code = r#" +from pydantic_core import core_schema, SchemaValidator + +def deserialize(data): + from baml_py.baml_py import BamlImagePy + if isinstance(data, BamlImagePy): + return data + else: + SchemaValidator( + core_schema.union_schema([ + core_schema.dataclass_args_schema('baml.Image', [ + core_schema.dataclass_field(name='url', schema=core_schema.str_schema()), + core_schema.dataclass_field(name='mime_type', schema=core_schema.with_default_schema(core_schema.str_schema(), default='')), + ]), + core_schema.dataclass_args_schema('baml.Image', [ + core_schema.dataclass_field(name='base64', schema=core_schema.str_schema()), + core_schema.dataclass_field(name='mime_type', schema=core_schema.with_default_schema(core_schema.str_schema(), default='')), + ]), + ]) + ).validate_python(data) + return BamlImagePy.baml_deserialize(data) + +def get_schema(): + # No validation + return core_schema.no_info_after_validator_function( + deserialize, + core_schema.any_schema(), + serialization=core_schema.plain_serializer_function_ser_schema( + lambda v: v.baml_serialize(), + ) + ) + +ret = get_schema() + "#; + // py.run(code, None, Some(ret_dict)); + let fun: Py = PyModule::from_code_bound(py, code, "", "")? + .getattr("ret")? + .into(); + Ok(fun.to_object(py)) + }) +} diff --git a/engine/language_client_python/src/types/mod.rs b/engine/language_client_python/src/types/mod.rs index 223a5f9a7..97d524fbc 100644 --- a/engine/language_client_python/src/types/mod.rs +++ b/engine/language_client_python/src/types/mod.rs @@ -6,6 +6,7 @@ pub(crate) mod client_registry; pub(crate) mod function_result_stream; pub(crate) mod function_results; pub(crate) mod image; +pub(super) mod media_repr; pub(crate) mod runtime_ctx_manager; pub(crate) mod span; pub(crate) mod trace_stats; From b10e99db66e63df4256ed198eaed388beda29d8b Mon Sep 17 00:00:00 2001 From: Sam Lijin Date: Fri, 18 Oct 2024 18:42:31 -0700 Subject: [PATCH 2/7] fix --- .../src/types/media_repr.rs | 32 ++++++++++++++----- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/engine/language_client_python/src/types/media_repr.rs b/engine/language_client_python/src/types/media_repr.rs index 0855ec245..babce7443 100644 --- a/engine/language_client_python/src/types/media_repr.rs +++ b/engine/language_client_python/src/types/media_repr.rs @@ -82,14 +82,30 @@ def deserialize(data): else: SchemaValidator( core_schema.union_schema([ - core_schema.dataclass_args_schema('baml.Image', [ - core_schema.dataclass_field(name='url', schema=core_schema.str_schema()), - core_schema.dataclass_field(name='mime_type', schema=core_schema.with_default_schema(core_schema.str_schema(), default='')), - ]), - core_schema.dataclass_args_schema('baml.Image', [ - core_schema.dataclass_field(name='base64', schema=core_schema.str_schema()), - core_schema.dataclass_field(name='mime_type', schema=core_schema.with_default_schema(core_schema.str_schema(), default='')), - ]), + core_schema.model_fields_schema({ + 'url': core_schema.model_field(core_schema.str_schema()), + 'mime_type': core_schema.model_field( + core_schema.with_default_schema( + core_schema.union_schema([ + core_schema.str_schema(), + core_schema.none_schema(), + ]), + default=None, + ), + ), + }), + core_schema.model_fields_schema({ + 'base64': core_schema.model_field(core_schema.str_schema()), + 'mime_type': core_schema.model_field( + core_schema.with_default_schema( + core_schema.union_schema([ + core_schema.str_schema(), + core_schema.none_schema(), + ]), + default=None, + ), + ), + }), ]) ).validate_python(data) return BamlImagePy.baml_deserialize(data) From 8ff9db294d405474a72964fa0236a51c52dd3b39 Mon Sep 17 00:00:00 2001 From: Sam Lijin Date: Fri, 18 Oct 2024 18:42:36 -0700 Subject: [PATCH 3/7] tests --- integ-tests/python/tests/test_pydantic.py | 72 +++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 integ-tests/python/tests/test_pydantic.py diff --git a/integ-tests/python/tests/test_pydantic.py b/integ-tests/python/tests/test_pydantic.py new file mode 100644 index 000000000..35d50b2ce --- /dev/null +++ b/integ-tests/python/tests/test_pydantic.py @@ -0,0 +1,72 @@ +import baml_py +import pydantic +import pytest + + +class Foo(pydantic.BaseModel): + my_image: baml_py.Image + + +def test_model_validate_success(): + foo_inst = Foo.model_validate( + {"my_image": {"url": "https://example.com/image.png"}} + ) + assert isinstance(foo_inst.my_image, baml_py.Image) + + foo_inst = Foo.model_validate( + {"my_image": {"url": "https://example.com/image.png", "mime_type": None}} + ) + assert isinstance(foo_inst.my_image, baml_py.Image) + + foo_inst = Foo.model_validate( + {"my_image": {"url": "https://example.com/image.png", "mime_type": "image/png"}} + ) + assert isinstance(foo_inst.my_image, baml_py.Image) + + foo_inst = Foo.model_validate( + {"my_image": {"base64": "iVBORw0KGgoAAAANSUhEUgAAAAUA"}} + ) + assert isinstance(foo_inst.my_image, baml_py.Image) + + foo_inst = Foo.model_validate( + { + "my_image": { + "base64": "iVBORw0KGgoAAAANSUhEUgAAAAUA", + "mime_type": None, + } + } + ) + assert isinstance(foo_inst.my_image, baml_py.Image) + + foo_inst = Foo.model_validate( + { + "my_image": { + "base64": "iVBORw0KGgoAAAANSUhEUgAAAAUA", + "mime_type": "image/png", + } + } + ) + assert isinstance(foo_inst.my_image, baml_py.Image) + + +def test_model_validate_failure(): + # assert that model validation produces a useful error + with pytest.raises(pydantic.ValidationError) as e: + Foo.model_validate({"my_image": {"not-a-url": "https://example.com/image.png"}}) + assert "my_image" in str(e.value) + assert "base64" in str(e.value) + assert "url" in str(e.value) + + +def test_model_dump(): + foo_inst = Foo(my_image=baml_py.Image.from_url("https://example.com/image.png")) + assert foo_inst.model_dump() == { + "my_image": {"url": "https://example.com/image.png"} + } + + foo_inst = Foo( + my_image=baml_py.Image.from_base64("image/png", "iVBORw0KGgoAAAANSUhEUgAAAAUA") + ) + assert foo_inst.model_dump() == { + "my_image": {"base64": "iVBORw0KGgoAAAANSUhEUgAAAAUA", "mime_type": "image/png"} + } From 618eede942e54b500b8bfa791c9dc4cb697f14d1 Mon Sep 17 00:00:00 2001 From: Sam Lijin Date: Fri, 18 Oct 2024 18:49:43 -0700 Subject: [PATCH 4/7] set more unused to deny --- engine/language_client_python/Cargo.toml | 2 ++ engine/language_client_python/src/errors.rs | 8 +------- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/engine/language_client_python/Cargo.toml b/engine/language_client_python/Cargo.toml index 92c6f3f26..78c110e25 100644 --- a/engine/language_client_python/Cargo.toml +++ b/engine/language_client_python/Cargo.toml @@ -13,6 +13,8 @@ crate-type = ["cdylib"] [lints.rust] dead_code = "deny" unused_imports = "deny" +unused_must_use = "deny" +unused_variables = "deny" [dependencies] anyhow.workspace = true diff --git a/engine/language_client_python/src/errors.rs b/engine/language_client_python/src/errors.rs index 96d528436..7d49bc68d 100644 --- a/engine/language_client_python/src/errors.rs +++ b/engine/language_client_python/src/errors.rs @@ -62,13 +62,7 @@ impl BamlError { } => { // Assuming ValidationError has fields that correspond to prompt, message, and raw_output // If not, you may need to adjust this part based on the actual structure of ValidationError - Python::with_gil(|py| { - raise_baml_validation_error( - prompt.clone(), - message.clone(), - raw_output.clone(), - ) - }) + raise_baml_validation_error(prompt.clone(), message.clone(), raw_output.clone()) } } } else if let Some(er) = err.downcast_ref::() { From ba4bfaeb71bf7627578c0d7694e1f5240a2a8547 Mon Sep 17 00:00:00 2001 From: Sam Lijin Date: Mon, 21 Oct 2024 10:31:40 -0700 Subject: [PATCH 5/7] use media_type --- .../src/types/media_repr.rs | 6 +++--- integ-tests/python/tests/test_pydantic.py | 18 +++++++++++++----- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/engine/language_client_python/src/types/media_repr.rs b/engine/language_client_python/src/types/media_repr.rs index babce7443..d6d860c5a 100644 --- a/engine/language_client_python/src/types/media_repr.rs +++ b/engine/language_client_python/src/types/media_repr.rs @@ -13,6 +13,7 @@ use serde::{Deserialize, Serialize}; #[derive(Debug, Serialize, Deserialize)] pub struct UserFacingBamlMedia { #[serde(skip_serializing_if = "Option::is_none")] + #[serde(rename = "media_type")] pub mime_type: Option, #[serde(flatten)] pub content: UserFacingBamlMediaContent, @@ -84,7 +85,7 @@ def deserialize(data): core_schema.union_schema([ core_schema.model_fields_schema({ 'url': core_schema.model_field(core_schema.str_schema()), - 'mime_type': core_schema.model_field( + 'media_type': core_schema.model_field( core_schema.with_default_schema( core_schema.union_schema([ core_schema.str_schema(), @@ -96,7 +97,7 @@ def deserialize(data): }), core_schema.model_fields_schema({ 'base64': core_schema.model_field(core_schema.str_schema()), - 'mime_type': core_schema.model_field( + 'media_type': core_schema.model_field( core_schema.with_default_schema( core_schema.union_schema([ core_schema.str_schema(), @@ -111,7 +112,6 @@ def deserialize(data): return BamlImagePy.baml_deserialize(data) def get_schema(): - # No validation return core_schema.no_info_after_validator_function( deserialize, core_schema.any_schema(), diff --git a/integ-tests/python/tests/test_pydantic.py b/integ-tests/python/tests/test_pydantic.py index 35d50b2ce..0e0d36ecb 100644 --- a/integ-tests/python/tests/test_pydantic.py +++ b/integ-tests/python/tests/test_pydantic.py @@ -14,12 +14,17 @@ def test_model_validate_success(): assert isinstance(foo_inst.my_image, baml_py.Image) foo_inst = Foo.model_validate( - {"my_image": {"url": "https://example.com/image.png", "mime_type": None}} + {"my_image": {"url": "https://example.com/image.png", "media_type": None}} ) assert isinstance(foo_inst.my_image, baml_py.Image) foo_inst = Foo.model_validate( - {"my_image": {"url": "https://example.com/image.png", "mime_type": "image/png"}} + { + "my_image": { + "url": "https://example.com/image.png", + "media_type": "image/png", + } + } ) assert isinstance(foo_inst.my_image, baml_py.Image) @@ -32,7 +37,7 @@ def test_model_validate_success(): { "my_image": { "base64": "iVBORw0KGgoAAAANSUhEUgAAAAUA", - "mime_type": None, + "media_type": None, } } ) @@ -42,7 +47,7 @@ def test_model_validate_success(): { "my_image": { "base64": "iVBORw0KGgoAAAANSUhEUgAAAAUA", - "mime_type": "image/png", + "media_type": "image/png", } } ) @@ -68,5 +73,8 @@ def test_model_dump(): my_image=baml_py.Image.from_base64("image/png", "iVBORw0KGgoAAAANSUhEUgAAAAUA") ) assert foo_inst.model_dump() == { - "my_image": {"base64": "iVBORw0KGgoAAAANSUhEUgAAAAUA", "mime_type": "image/png"} + "my_image": { + "base64": "iVBORw0KGgoAAAANSUhEUgAAAAUA", + "media_type": "image/png", + } } From 5f5f3c49a658730ce789f350bb76d7836c8a5d17 Mon Sep 17 00:00:00 2001 From: Sam Lijin Date: Mon, 21 Oct 2024 11:03:57 -0700 Subject: [PATCH 6/7] fix --- engine/language_client_python/src/types/media_repr.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/engine/language_client_python/src/types/media_repr.rs b/engine/language_client_python/src/types/media_repr.rs index d6d860c5a..e8d176b0c 100644 --- a/engine/language_client_python/src/types/media_repr.rs +++ b/engine/language_client_python/src/types/media_repr.rs @@ -67,6 +67,12 @@ impl TryInto for &BamlMedia { /// - allows constructing Pydantic models containing a BamlImagePy instance /// - allows FastAPI requests to deserialize BamlImagePy instances in JSON format /// - allows serializing BamlImagePy instances in JSON format +/// +/// Ideally this belongs in baml_py.internal_monkeypatch, so that we can get +/// ruff-based type checking, but this depends on the pydantic libraries, so we +/// can't implement this in internal_monkeypatch without adding a hard dependency +/// on pydantic. And we don't want to do _that_, because that will make it harder +/// to implement output_type python/vanilla in the future. pub fn __get_pydantic_core_schema__( _cls: Bound<'_, PyType>, _source_type: Bound<'_, PyAny>, From 58a44f8f2aea85f5ce74b626da7e80d9d4c8c04e Mon Sep 17 00:00:00 2001 From: Sam Lijin Date: Mon, 21 Oct 2024 12:15:51 -0700 Subject: [PATCH 7/7] fix build --- docs/docs/snippets/supported-types.mdx | 4 ++-- engine/language_client_python/src/types/function_results.rs | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/docs/snippets/supported-types.mdx b/docs/docs/snippets/supported-types.mdx index c376fcee5..07f6c6ac2 100644 --- a/docs/docs/snippets/supported-types.mdx +++ b/docs/docs/snippets/supported-types.mdx @@ -145,7 +145,7 @@ If using Pydantic, the following are valid ways to construct the `Image` type. ```json { "url": "https://upload.wikimedia.org/wikipedia/en/4/4d/Shrek_%28character%29.png", - "mime_type": "image/png" + "media_type": "image/png" } ``` @@ -158,7 +158,7 @@ If using Pydantic, the following are valid ways to construct the `Image` type. ```json { "base64": "iVBORw0K....", - "mime_type": "image/png" + "media_type": "image/png" } ``` diff --git a/engine/language_client_python/src/types/function_results.rs b/engine/language_client_python/src/types/function_results.rs index e25b80d97..bbd483b57 100644 --- a/engine/language_client_python/src/types/function_results.rs +++ b/engine/language_client_python/src/types/function_results.rs @@ -1,8 +1,7 @@ use baml_types::BamlValue; use pyo3::prelude::{pymethods, PyResult}; -use pyo3::types::{PyAnyMethods, PyListMethods, PyModule}; +use pyo3::types::{PyAnyMethods, PyModule}; use pyo3::{Bound, IntoPy, PyObject, Python}; -use pythonize::pythonize; use crate::errors::BamlError;