diff --git a/crates/deltalake-core/src/kernel/arrow/mod.rs b/crates/deltalake-core/src/kernel/arrow/mod.rs index bfbfb76b7b..ab121ee8a6 100644 --- a/crates/deltalake-core/src/kernel/arrow/mod.rs +++ b/crates/deltalake-core/src/kernel/arrow/mod.rs @@ -564,11 +564,13 @@ fn max_min_schema_for_fields(dest: &mut Vec, f: &ArrowField) { max_min_schema_for_fields(&mut child_dest, f); } - dest.push(ArrowField::new( - f.name(), - ArrowDataType::Struct(child_dest.into()), - true, - )); + if !child_dest.is_empty() { + dest.push(ArrowField::new( + f.name(), + ArrowDataType::Struct(child_dest.into()), + true, + )); + } } // don't compute min or max for list, map or binary types ArrowDataType::List(_) | ArrowDataType::Map(_, _) | ArrowDataType::Binary => { /* noop */ } diff --git a/crates/deltalake-core/src/protocol/checkpoints.rs b/crates/deltalake-core/src/protocol/checkpoints.rs index e4a155e477..b6787b9b31 100644 --- a/crates/deltalake-core/src/protocol/checkpoints.rs +++ b/crates/deltalake-core/src/protocol/checkpoints.rs @@ -522,7 +522,8 @@ fn apply_stats_conversion( mod tests { use std::sync::Arc; - use arrow_array::{ArrayRef, RecordBatch}; + use arrow_array::builder::{Int32Builder, ListBuilder, StructBuilder}; + use arrow_array::{ArrayRef, Int32Array, RecordBatch}; use arrow_schema::Schema as ArrowSchema; use chrono::Duration; use lazy_static::lazy_static; @@ -903,6 +904,43 @@ mod tests { ); } + #[tokio::test] + async fn test_struct_with_single_list_field() { + // you need another column otherwise the entire stats struct is empty + // which also fails parquet write during checkpoint + let other_column_array: ArrayRef = Arc::new(Int32Array::from(vec![1])); + + let mut list_item_builder = Int32Builder::new(); + list_item_builder.append_value(1); + + let mut list_in_struct_builder = ListBuilder::new(list_item_builder); + list_in_struct_builder.append(true); + + let mut struct_builder = StructBuilder::new( + vec![arrow_schema::Field::new( + "list_in_struct", + arrow_schema::DataType::List(Arc::new(arrow_schema::Field::new( + "item", + arrow_schema::DataType::Int32, + true, + ))), + true, + )], + vec![Box::new(list_in_struct_builder)], + ); + struct_builder.append(true); + + let struct_with_list_array: ArrayRef = Arc::new(struct_builder.finish()); + let batch = RecordBatch::try_from_iter(vec![ + ("other_column", other_column_array), + ("struct_with_list", struct_with_list_array), + ]) + .unwrap(); + let table = DeltaOps::new_in_memory().write(vec![batch]).await.unwrap(); + + create_checkpoint(&table).await.unwrap(); + } + lazy_static! { static ref SCHEMA: Value = json!({ "type": "struct",