diff --git a/datafusion/functions/src/string/concat.rs b/datafusion/functions/src/string/concat.rs index c8da67c18672..e67454125328 100644 --- a/datafusion/functions/src/string/concat.rs +++ b/datafusion/functions/src/string/concat.rs @@ -204,7 +204,9 @@ impl ScalarUDFImpl for ConcatFunc { DataType::Utf8View => { let string_array = as_string_view_array(array)?; - data_size += string_array.len(); + // This is an estimate; in particular, it will + // undercount arrays of short strings (<= 12 bytes). + data_size += string_array.total_buffer_bytes_used(); let column = if array.is_nullable() { ColumnarValueRef::NullableStringViewArray(string_array) } else { diff --git a/datafusion/functions/src/string/concat_ws.rs b/datafusion/functions/src/string/concat_ws.rs index ee62c36c0450..9d3b32eedf8f 100644 --- a/datafusion/functions/src/string/concat_ws.rs +++ b/datafusion/functions/src/string/concat_ws.rs @@ -247,6 +247,8 @@ impl ScalarUDFImpl for ConcatWsFunc { DataType::Utf8View => { let string_array = as_string_view_array(array)?; + // This is an estimate; in particular, it will + // undercount arrays of short strings (<= 12 bytes). data_size += string_array.total_buffer_bytes_used(); let column = if array.is_nullable() { ColumnarValueRef::NullableStringViewArray(string_array) diff --git a/datafusion/functions/src/strings.rs b/datafusion/functions/src/strings.rs index a7be3ef79299..cfddf57b094b 100644 --- a/datafusion/functions/src/strings.rs +++ b/datafusion/functions/src/strings.rs @@ -152,43 +152,34 @@ impl StringViewArrayBuilder { } ColumnarValueRef::NullableArray(array) => { if !CHECK_VALID || array.is_valid(i) { - self.block.push_str( - std::str::from_utf8(array.value(i).as_bytes()).unwrap(), - ); + self.block.push_str(array.value(i)); } } ColumnarValueRef::NullableLargeStringArray(array) => { if !CHECK_VALID || array.is_valid(i) { - self.block.push_str( - std::str::from_utf8(array.value(i).as_bytes()).unwrap(), - ); + self.block.push_str(array.value(i)); } } ColumnarValueRef::NullableStringViewArray(array) => { if !CHECK_VALID || array.is_valid(i) { - self.block.push_str( - std::str::from_utf8(array.value(i).as_bytes()).unwrap(), - ); + self.block.push_str(array.value(i)); } } ColumnarValueRef::NonNullableArray(array) => { - self.block - .push_str(std::str::from_utf8(array.value(i).as_bytes()).unwrap()); + self.block.push_str(array.value(i)); } ColumnarValueRef::NonNullableLargeStringArray(array) => { - self.block - .push_str(std::str::from_utf8(array.value(i).as_bytes()).unwrap()); + self.block.push_str(array.value(i)); } ColumnarValueRef::NonNullableStringViewArray(array) => { - self.block - .push_str(std::str::from_utf8(array.value(i).as_bytes()).unwrap()); + self.block.push_str(array.value(i)); } } } pub fn append_offset(&mut self) { self.builder.append_value(&self.block); - self.block = String::new(); + self.block.clear(); } pub fn finish(mut self) -> StringViewArray {