From 10c6b5c160301633db857617ed49adffe4dfda6a Mon Sep 17 00:00:00 2001 From: Jonas Irgens Kylling Date: Sat, 19 Oct 2024 17:30:56 +0200 Subject: [PATCH] fix: improve errors on field cast failures (#2932) # Description Adds information on the field, to-type and from-type when casting fails. We could consider using our own error type for the casting errors to allow unrolling errors to get the full path to a field. Currently we only give the last part of the path. When looking at `cast_field` I noticed that we might be missing a match on `(DataType::List(_), DataType::LargeList(_))`. Casting List to LargeList can currently cause some tricky behaviour. I had a record batch with a List type, and tried reading it with a LargeList schema. For some choices of schemas it failed with an error message, for other schemas is did not fail, but read the columns in the wrong order. Signed-off-by: R. Tyler Croy --- crates/core/src/operations/cast/mod.rs | 19 ++++++++++++++++++- python/tests/test_writer.py | 6 ++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/crates/core/src/operations/cast/mod.rs b/crates/core/src/operations/cast/mod.rs index 554373e623..278cb2bbfa 100644 --- a/crates/core/src/operations/cast/mod.rs +++ b/crates/core/src/operations/cast/mod.rs @@ -144,7 +144,19 @@ fn cast_field( add_missing, )?) as ArrayRef), _ if is_cast_required(col_type, field_type) => { - cast_with_options(col, field_type, cast_options) + cast_with_options(col, field_type, cast_options).map_err(|err| { + if let ArrowError::CastError(err) = err { + ArrowError::CastError(format!( + "Failed to cast {} from {} to {}: {}", + field.name(), + field_type, + col_type, + err + )) + } else { + err + } + }) } _ => Ok(col.clone()), } @@ -337,6 +349,11 @@ mod tests { assert!(!is_cast_required(&field1, &field2)); } + #[test] + fn test_is_cast_required_with_smol_int() { + assert!(is_cast_required(&DataType::Int8, &DataType::Int32)); + } + #[test] fn test_is_cast_required_with_list_non_default_item() { let field1 = DataType::List(FieldRef::from(Field::new("item", DataType::Int32, false))); diff --git a/python/tests/test_writer.py b/python/tests/test_writer.py index c43e5d1136..a6662c48d6 100644 --- a/python/tests/test_writer.py +++ b/python/tests/test_writer.py @@ -273,7 +273,8 @@ def test_write_type_castable_types(existing_table: DeltaTable): engine="rust", ) with pytest.raises( - Exception, match="Cast error: Cannot cast string 'hello' to value of Int8 type" + Exception, + match="Cast error: Failed to cast int8 from Int8 to Utf8: Cannot cast string 'hello' to value of Int8 type", ): write_deltalake( existing_table, @@ -284,7 +285,8 @@ def test_write_type_castable_types(existing_table: DeltaTable): ) with pytest.raises( - Exception, match="Cast error: Can't cast value 1000 to type Int8" + Exception, + match="Cast error: Failed to cast int8 from Int8 to Int64: Can't cast value 1000 to type Int8", ): write_deltalake( existing_table,