Skip to content

Commit b555eb0

Browse files
committed
Revert "feat: Support faster multi-column grouping ( GroupColumn) for Date/Time/Timestamp types (apache#13457)"
This reverts commit ecc04d4.
1 parent e607983 commit b555eb0

File tree

3 files changed

+3
-263
lines changed

3 files changed

+3
-263
lines changed

datafusion/physical-plan/src/aggregates/group_values/mod.rs

-28
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,7 @@
1818
//! [`GroupValues`] trait for storing and interning group keys
1919
2020
use arrow::record_batch::RecordBatch;
21-
use arrow_array::types::{
22-
Date32Type, Date64Type, Time32MillisecondType, Time32SecondType,
23-
Time64MicrosecondType, Time64NanosecondType, TimestampMicrosecondType,
24-
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
25-
};
2621
use arrow_array::{downcast_primitive, ArrayRef};
27-
use arrow_schema::TimeUnit;
2822
use arrow_schema::{DataType, SchemaRef};
2923
use datafusion_common::Result;
3024

@@ -148,28 +142,6 @@ pub(crate) fn new_group_values(
148142
}
149143

150144
match d {
151-
DataType::Date32 => {
152-
downcast_helper!(Date32Type, d);
153-
}
154-
DataType::Date64 => {
155-
downcast_helper!(Date64Type, d);
156-
}
157-
DataType::Time32(t) => match t {
158-
TimeUnit::Second => downcast_helper!(Time32SecondType, d),
159-
TimeUnit::Millisecond => downcast_helper!(Time32MillisecondType, d),
160-
_ => {}
161-
},
162-
DataType::Time64(t) => match t {
163-
TimeUnit::Microsecond => downcast_helper!(Time64MicrosecondType, d),
164-
TimeUnit::Nanosecond => downcast_helper!(Time64NanosecondType, d),
165-
_ => {}
166-
},
167-
DataType::Timestamp(t, _) => match t {
168-
TimeUnit::Second => downcast_helper!(TimestampSecondType, d),
169-
TimeUnit::Millisecond => downcast_helper!(TimestampMillisecondType, d),
170-
TimeUnit::Microsecond => downcast_helper!(TimestampMicrosecondType, d),
171-
TimeUnit::Nanosecond => downcast_helper!(TimestampNanosecondType, d),
172-
},
173145
DataType::Utf8 => {
174146
return Ok(Box::new(GroupValuesByes::<i32>::new(OutputType::Utf8)));
175147
}

datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs

+3-39
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,12 @@ use ahash::RandomState;
3232
use arrow::compute::cast;
3333
use arrow::datatypes::{
3434
BinaryViewType, Date32Type, Date64Type, Float32Type, Float64Type, Int16Type,
35-
Int32Type, Int64Type, Int8Type, StringViewType, Time32MillisecondType,
36-
Time32SecondType, Time64MicrosecondType, Time64NanosecondType,
37-
TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
38-
TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
35+
Int32Type, Int64Type, Int8Type, StringViewType, UInt16Type, UInt32Type, UInt64Type,
36+
UInt8Type,
3937
};
4038
use arrow::record_batch::RecordBatch;
4139
use arrow_array::{Array, ArrayRef};
42-
use arrow_schema::{DataType, Schema, SchemaRef, TimeUnit};
40+
use arrow_schema::{DataType, Schema, SchemaRef};
4341
use datafusion_common::hash_utils::create_hashes;
4442
use datafusion_common::{not_impl_err, DataFusionError, Result};
4543
use datafusion_execution::memory_pool::proxy::{RawTableAllocExt, VecAllocExt};
@@ -915,38 +913,6 @@ impl<const STREAMING: bool> GroupValues for GroupValuesColumn<STREAMING> {
915913
}
916914
&DataType::Date32 => instantiate_primitive!(v, nullable, Date32Type),
917915
&DataType::Date64 => instantiate_primitive!(v, nullable, Date64Type),
918-
&DataType::Time32(t) => match t {
919-
TimeUnit::Second => {
920-
instantiate_primitive!(v, nullable, Time32SecondType)
921-
}
922-
TimeUnit::Millisecond => {
923-
instantiate_primitive!(v, nullable, Time32MillisecondType)
924-
}
925-
_ => {}
926-
},
927-
&DataType::Time64(t) => match t {
928-
TimeUnit::Microsecond => {
929-
instantiate_primitive!(v, nullable, Time64MicrosecondType)
930-
}
931-
TimeUnit::Nanosecond => {
932-
instantiate_primitive!(v, nullable, Time64NanosecondType)
933-
}
934-
_ => {}
935-
},
936-
&DataType::Timestamp(t, _) => match t {
937-
TimeUnit::Second => {
938-
instantiate_primitive!(v, nullable, TimestampSecondType)
939-
}
940-
TimeUnit::Millisecond => {
941-
instantiate_primitive!(v, nullable, TimestampMillisecondType)
942-
}
943-
TimeUnit::Microsecond => {
944-
instantiate_primitive!(v, nullable, TimestampMicrosecondType)
945-
}
946-
TimeUnit::Nanosecond => {
947-
instantiate_primitive!(v, nullable, TimestampNanosecondType)
948-
}
949-
},
950916
&DataType::Utf8 => {
951917
let b = ByteGroupValueBuilder::<i32>::new(OutputType::Utf8);
952918
v.push(Box::new(b) as _)
@@ -1159,8 +1125,6 @@ fn supported_type(data_type: &DataType) -> bool {
11591125
| DataType::LargeBinary
11601126
| DataType::Date32
11611127
| DataType::Date64
1162-
| DataType::Time32(_)
1163-
| DataType::Timestamp(_, _)
11641128
| DataType::Utf8View
11651129
| DataType::BinaryView
11661130
)

datafusion/sqllogictest/test_files/group_by.slt

-196
Original file line numberDiff line numberDiff line change
@@ -5272,201 +5272,6 @@ drop view t
52725272
statement ok
52735273
drop table source;
52745274

5275-
# Test multi group by int + Date32
5276-
statement ok
5277-
create table source as values
5278-
(1, '2020-01-01'),
5279-
(1, '2020-01-01'),
5280-
(2, '2020-01-02'),
5281-
(2, '2020-01-03'),
5282-
(3, '2020-01-04'),
5283-
(3, '2020-01-04'),
5284-
(2, '2020-01-03'),
5285-
(null, null),
5286-
(null, '2020-01-01'),
5287-
(null, null),
5288-
(null, '2020-01-01'),
5289-
(2, '2020-01-02'),
5290-
(2, '2020-01-02'),
5291-
(1, null)
5292-
;
5293-
5294-
statement ok
5295-
create view t as select column1 as a, arrow_cast(column2, 'Date32') as b from source;
5296-
5297-
query IDI
5298-
select a, b, count(*) from t group by a, b order by a, b;
5299-
----
5300-
1 2020-01-01 2
5301-
1 NULL 1
5302-
2 2020-01-02 3
5303-
2 2020-01-03 2
5304-
3 2020-01-04 2
5305-
NULL 2020-01-01 2
5306-
NULL NULL 2
5307-
5308-
statement ok
5309-
drop view t
5310-
5311-
statement ok
5312-
drop table source;
5313-
5314-
# Test multi group by int + Date64
5315-
statement ok
5316-
create table source as values
5317-
(1, '2020-01-01'),
5318-
(1, '2020-01-01'),
5319-
(2, '2020-01-02'),
5320-
(2, '2020-01-03'),
5321-
(3, '2020-01-04'),
5322-
(3, '2020-01-04'),
5323-
(2, '2020-01-03'),
5324-
(null, null),
5325-
(null, '2020-01-01'),
5326-
(null, null),
5327-
(null, '2020-01-01'),
5328-
(2, '2020-01-02'),
5329-
(2, '2020-01-02'),
5330-
(1, null)
5331-
;
5332-
5333-
statement ok
5334-
create view t as select column1 as a, arrow_cast(column2, 'Date64') as b from source;
5335-
5336-
query IDI
5337-
select a, b, count(*) from t group by a, b order by a, b;
5338-
----
5339-
1 2020-01-01T00:00:00 2
5340-
1 NULL 1
5341-
2 2020-01-02T00:00:00 3
5342-
2 2020-01-03T00:00:00 2
5343-
3 2020-01-04T00:00:00 2
5344-
NULL 2020-01-01T00:00:00 2
5345-
NULL NULL 2
5346-
5347-
statement ok
5348-
drop view t
5349-
5350-
statement ok
5351-
drop table source;
5352-
5353-
# Test multi group by int + Time32
5354-
statement ok
5355-
create table source as values
5356-
(1, '12:34:56'),
5357-
(1, '12:34:56'),
5358-
(2, '13:00:00'),
5359-
(2, '14:15:00'),
5360-
(3, '23:59:59'),
5361-
(3, '23:59:59'),
5362-
(2, '14:15:00'),
5363-
(null, null),
5364-
(null, '12:00:00'),
5365-
(null, null),
5366-
(null, '12:00:00'),
5367-
(2, '13:00:00'),
5368-
(2, '13:00:00'),
5369-
(1, null)
5370-
;
5371-
5372-
statement ok
5373-
create view t as select column1 as a, arrow_cast(column2, 'Time32(Second)') as b from source;
5374-
5375-
query IDI
5376-
select a, b, count(*) from t group by a, b order by a, b;
5377-
----
5378-
1 12:34:56 2
5379-
1 NULL 1
5380-
2 13:00:00 3
5381-
2 14:15:00 2
5382-
3 23:59:59 2
5383-
NULL 12:00:00 2
5384-
NULL NULL 2
5385-
5386-
statement ok
5387-
drop view t
5388-
5389-
statement ok
5390-
drop table source;
5391-
5392-
# Test multi group by int + Time64
5393-
statement ok
5394-
create table source as values
5395-
(1, '12:34:56.123456'),
5396-
(1, '12:34:56.123456'),
5397-
(2, '13:00:00.000001'),
5398-
(2, '14:15:00.999999'),
5399-
(3, '23:59:59.500000'),
5400-
(3, '23:59:59.500000'),
5401-
(2, '14:15:00.999999'),
5402-
(null, null),
5403-
(null, '12:00:00.000000'),
5404-
(null, null),
5405-
(null, '12:00:00.000000'),
5406-
(2, '13:00:00.000001'),
5407-
(2, '13:00:00.000001'),
5408-
(1, null)
5409-
;
5410-
5411-
statement ok
5412-
create view t as select column1 as a, arrow_cast(column2, 'Time64(Microsecond)') as b from source;
5413-
5414-
query IDI
5415-
select a, b, count(*) from t group by a, b order by a, b;
5416-
----
5417-
1 12:34:56.123456 2
5418-
1 NULL 1
5419-
2 13:00:00.000001 3
5420-
2 14:15:00.999999 2
5421-
3 23:59:59.500 2
5422-
NULL 12:00:00 2
5423-
NULL NULL 2
5424-
5425-
statement ok
5426-
drop view t
5427-
5428-
statement ok
5429-
drop table source;
5430-
5431-
# Test multi group by int + Timestamp
5432-
statement ok
5433-
create table source as values
5434-
(1, '2020-01-01 12:34:56'),
5435-
(1, '2020-01-01 12:34:56'),
5436-
(2, '2020-01-02 13:00:00'),
5437-
(2, '2020-01-03 14:15:00'),
5438-
(3, '2020-01-04 23:59:59'),
5439-
(3, '2020-01-04 23:59:59'),
5440-
(2, '2020-01-03 14:15:00'),
5441-
(null, null),
5442-
(null, '2020-01-01 12:00:00'),
5443-
(null, null),
5444-
(null, '2020-01-01 12:00:00'),
5445-
(2, '2020-01-02 13:00:00'),
5446-
(2, '2020-01-02 13:00:00'),
5447-
(1, null)
5448-
;
5449-
5450-
statement ok
5451-
create view t as select column1 as a, arrow_cast(column2, 'Timestamp(Nanosecond, None)') as b from source;
5452-
5453-
query IPI
5454-
select a, b, count(*) from t group by a, b order by a, b;
5455-
----
5456-
1 2020-01-01T12:34:56 2
5457-
1 NULL 1
5458-
2 2020-01-02T13:00:00 3
5459-
2 2020-01-03T14:15:00 2
5460-
3 2020-01-04T23:59:59 2
5461-
NULL 2020-01-01T12:00:00 2
5462-
NULL NULL 2
5463-
5464-
statement ok
5465-
drop view t
5466-
5467-
statement ok
5468-
drop table source;
5469-
54705275
# Test whether min, max accumulator produces NaN result when input is NaN.
54715276
# See https://github.com/apache/datafusion/issues/13415 for rationale
54725277
statement ok
@@ -5482,4 +5287,3 @@ query RR
54825287
SELECT max(input_table.x), min(input_table.x) from input_table GROUP BY input_table."row";
54835288
----
54845289
NaN NaN
5485-

0 commit comments

Comments
 (0)