risingwavelabs · xxhZs · Dec 30, 2024 · Dec 17, 2024 · Dec 18, 2024 · Dec 19, 2024
diff --git a/e2e_test/batch/types/timestamp_ns.slt.part b/e2e_test/batch/types/timestamp_ns.slt.part
@@ -0,0 +1,112 @@
+statement ok
+SET RW_IMPLICIT_FLUSH TO true;
+
+statement ok
+create table t1(v1 int, v2 timestamp);
+
+statement ok
+insert into t1 values(1,'2013-01-01 01:01:01.123456789'),(2,'2012-01-01 01:01:01.123456'),(3,'0000-01-01 01:01:01.123456789'),(4,'2213-01-01 01:01:01.123456789'),(5,null);
+
+query T
+select * from t1;
+----
+3  0001-01-01 01:01:01.123456789 BC
+5  null
+1  2013-01-01 01:01:01.123456789
+4  2213-01-01 01:01:01.123456789
+2  2012-01-01 01:01:01.123456
+
+query T
+select * from t1 where v1 is null;
+----
+5 null
+
+query T
+select v1, v2,
+case
+    when extract(year from v2) < 2000 then 'Before 2000'
+    when extract(year from v2) >= 2000 and extract(year from v2) < 2100 then '21st Century'
+    else 'Future'
+end as time_period
+from t1;
+----
+2  2012-01-01 01:01:01.123456        21st Century
+1  2013-01-01 01:01:01.123456789     21st Century
+4  2213-01-01 01:01:01.123456789     Future
+3  0001-01-01 01:01:01.123456789 BC  Before 2000
+5  null                              Future
+
+query T
+select v1, v2, coalesce(v2, '1900-01-01 00:00:00') as coalesce_v2 from t1;
+----
+3  0001-01-01 01:01:01.123456789 BC  0001-01-01 01:01:01.123456789 BC
+5  null                              1900-01-01 00:00:00
+1  2013-01-01 01:01:01.123456789     2013-01-01 01:01:01.123456789
+4  2213-01-01 01:01:01.123456789     2213-01-01 01:01:01.123456789
+2  2012-01-01 01:01:01.123456        2012-01-01 01:01:01.123456
+
+query T
+select count(v2) as total_rows from t1;
+----
+4
+
+query T
+select * from t1 order by v2;
+----
+3  0001-01-01 01:01:01.123456789 BC
+2  2012-01-01 01:01:01.123456
+1  2013-01-01 01:01:01.123456789
+4  2213-01-01 01:01:01.123456789
+5  null
+
+query T
+select * from t1 where v2 >= '2012-01-01 01:01:01.123456';
+----
+2  2012-01-01 01:01:01.123456
+1  2013-01-01 01:01:01.123456789
+4  2213-01-01 01:01:01.123456789
+
+query T
+select v1, cast(v2 as date) as date_v2, cast(v2 as timestamp with time zone) as timestamptz_v2 from t1;
+----
+3  0001-01-01 BC  0001-01-01 01:01:01.123456+00:00 BC
+5  null           null
+1  2013-01-01     2013-01-01 01:01:01.123456+00:00
+4  2213-01-01     2213-01-01 01:01:01.123456+00:00
+2  2012-01-01     2012-01-01 01:01:01.123456+00:00
+
+query T
+select v1, date_trunc('day', v2) AS truncated_v2 from t1;
+----
+3  0001-01-01 00:00:00 BC
+5  null
+2  2012-01-01 00:00:00
+1  2013-01-01 00:00:00
+4  2213-01-01 00:00:00
+
+query T
+select v1, v2 at time zone 'UTC' as v2_utc from t1;
+----
+3  0001-01-01 01:01:01.123456+00:00 BC
+5  null
+1  2013-01-01 01:01:01.123456+00:00
+4  2213-01-01 01:01:01.123456+00:00
+2  2012-01-01 01:01:01.123456+00:00
+
+query T
+select v1, to_char(v2, 'YYYY-MM-DD HH24:MI:SS.NS') as formatted_v2 from t1;
+----
+3  0001-01-01 01:01:01.123456789 BC
+5  null
+1  2013-01-01 01:01:01.123456000
+4  2213-01-01 01:01:01.123456789
+2  2012-01-01 01:01:01.123456000
+
+query T
+select generate_series('2013-01-01 01:01:01.123456789'::timestamp,'2013-01-01 01:01:05.123456790'::timestamp, '1 s');
+----
+2013-01-01 01:01:01.123456789
+2013-01-01 01:01:02.123456789
+2013-01-01 01:01:03.123456789
+2013-01-01 01:01:04.123456789
+2013-01-01 01:01:05.123456789
diff --git a/src/common/Cargo.toml b/src/common/Cargo.toml
@@ -56,6 +56,7 @@ humantime = "2.1"
 hytra = { workspace = true }
 itertools = { workspace = true }
 itoa = "1.0"
+jiff = "0.1.15"
 jsonbb = { workspace = true }
 lru = { workspace = true }
 memcomparable = { version = "0.2", features = ["decimal"] }

diff --git a/src/common/src/types/datetime.rs b/src/common/src/types/datetime.rs
@@ -168,29 +168,18 @@ impl FromStr for Timestamp {
     type Err = InvalidParamsError;
 
     fn from_str(s: &str) -> Result<Self> {
-        if let Ok(res) = speedate::DateTime::parse_str_rfc3339(s) {
-            if res.time.tz_offset.is_some() {
-                return Err(ErrorKind::ParseTimestamp.into());
-            }
-            Ok(Date::from_ymd_uncheck(
-                res.date.year as i32,
-                res.date.month as u32,
-                res.date.day as u32,
-            )
-            .and_hms_micro_uncheck(
-                res.time.hour as u32,
-                res.time.minute as u32,
-                res.time.second as u32,
-                res.time.microsecond,
-            ))
-        } else {
-            let res =
-                speedate::Date::parse_str_rfc3339(s).map_err(|_| ErrorKind::ParseTimestamp)?;
-            Ok(
-                Date::from_ymd_uncheck(res.year as i32, res.month as u32, res.day as u32)
-                    .and_hms_micro_uncheck(0, 0, 0, 0),
-            )
-        }
+        let dt = s
+            .parse::<jiff::civil::DateTime>()
+            .map_err(|_| ErrorKind::ParseTimestamp)?;
+        Ok(
+            Date::from_ymd_uncheck(dt.year() as i32, dt.month() as u32, dt.day() as u32)
+                .and_hms_nano_uncheck(
+                    dt.hour() as u32,
+                    dt.minute() as u32,
+                    dt.second() as u32,
+                    dt.subsec_nanosecond() as u32,
+                ),
+        )
     }
 }
 
@@ -422,6 +411,13 @@ impl Date {
                 .and_time(Time::from_hms_micro_uncheck(hour, min, sec, micro).0),
         )
     }
+
+    pub fn and_hms_nano_uncheck(self, hour: u32, min: u32, sec: u32, nano: u32) -> Timestamp {
+        Timestamp::new(
+            self.0
+                .and_time(Time::from_hms_nano_uncheck(hour, min, sec, nano).0),
+        )
+    }
 }
 
 impl Time {
@@ -485,6 +481,38 @@ impl Time {
     }
 }
 
+/// document about old and new format, including the meaning of highest 2 bits, and the corresponding accepted ranges.
+/// The enumeration holds the correct value, which will be added(removed) to highest 2 bits when calling `to_protobuf` and `from_protobuf` methods
+enum FirstI64 {
+    V0 { usecs: i64 },
+    V1 { secs: i64 },
+}
+impl FirstI64 {
+    pub fn to_protobuf(&self) -> i64 {
+        match self {
+            FirstI64::V0 { usecs } => *usecs,
+            FirstI64::V1 { secs } => secs ^ (0b01 << 62),
+        }
+    }
+
+    pub fn from_protobuf(cur: &mut Cursor<&[u8]>) -> ArrayResult<FirstI64> {
+        let value = cur
+            .read_i64::<BigEndian>()
+            .context("failed to read i64 from Time buffer")?;
+        if Self::is_v1_format_state(value) {
+            let secs = value ^ (0b01 << 62);
+            Ok(FirstI64::V1 { secs })
+        } else {
+            Ok(FirstI64::V0 { usecs: value })
+        }
+    }
+
+    fn is_v1_format_state(value: i64) -> bool {
+        let state = (value >> 62) & 0b11;
+        state == 0b10 || state == 0b01
+    }
+}
+
 impl Timestamp {
     pub fn with_secs_nsecs(secs: i64, nsecs: u32) -> Result<Self> {
         Ok(Timestamp::new({
@@ -495,18 +523,34 @@ impl Timestamp {
     }
 
     pub fn from_protobuf(cur: &mut Cursor<&[u8]>) -> ArrayResult<Timestamp> {
-        let micros = cur
-            .read_i64::<BigEndian>()
-            .context("failed to read i64 from Timestamp buffer")?;
-
-        Ok(Timestamp::with_micros(micros)?)
+        match FirstI64::from_protobuf(cur)? {
+            FirstI64::V0 { usecs } => Ok(Timestamp::with_micros(usecs)?),
+            FirstI64::V1 { secs } => {
+                let nsecs = cur
+                    .read_u32::<BigEndian>()
+                    .context("failed to read u32 from Time buffer")?;
+                Ok(Timestamp::with_secs_nsecs(secs, nsecs)?)
+            }
+        }
     }
 
-    /// Although `Timestamp` takes 12 bytes, we drop 4 bytes in protobuf encoding.
+    // Since timestamp secs is much smaller than i64, we use the highest 2 bit to store the format information, which is compatible with the old format.
+    // New format: secs(i64) + nsecs(u32)
+    // Old format: micros(i64)
     pub fn to_protobuf<T: Write>(self, output: &mut T) -> ArrayResult<usize> {
-        output
-            .write(&(self.0.and_utc().timestamp_micros()).to_be_bytes())
-            .map_err(Into::into)
+        let timestamp_size = output
+            .write(
+                &(FirstI64::V1 {
+                    secs: self.0.and_utc().timestamp(),
+                }
+                .to_protobuf())
+                .to_be_bytes(),
+            )
+            .map_err(Into::<ArrayError>::into)?;
+        let timestamp_subsec_nanos_size = output
+            .write(&(self.0.and_utc().timestamp_subsec_nanos()).to_be_bytes())
+            .map_err(Into::<ArrayError>::into)?;
+        Ok(timestamp_subsec_nanos_size + timestamp_size)
     }
 
     pub fn get_timestamp_nanos(&self) -> i64 {

diff --git a/src/expr/impl/src/scalar/to_char.rs b/src/expr/impl/src/scalar/to_char.rs
@@ -87,6 +87,8 @@ impl ChronoPattern {
             ("Mon", "%b"),
             ("DD", "%d"),
             ("dd", "%d"),
+            ("NS", "%9f"),
+            ("ns", "%9f"),
             ("US", "%6f"),
             ("us", "%6f"),
             ("MS", "%3f"),