Skip to content

Commit 8a0bd8b

Browse files
author
Cheng-Yuan-Lai
committed
doc-gen: migrate scalar functions (datetime) documentation 1/2
1 parent b9cef8c commit 8a0bd8b

File tree

6 files changed

+163
-203
lines changed

6 files changed

+163
-203
lines changed

datafusion/functions/src/datetime/current_date.rs

+11-19
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,21 @@ use arrow::datatypes::DataType::Date32;
2222
use chrono::{Datelike, NaiveDate};
2323

2424
use datafusion_common::{internal_err, Result, ScalarValue};
25-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
2625
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
2726
use datafusion_expr::{
2827
ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
2928
};
30-
use std::sync::OnceLock;
29+
use datafusion_macros::user_doc;
3130

31+
#[user_doc(
32+
doc_section(label = "Time and Date Functions"),
33+
description = r#"
34+
Returns the current UTC date.
35+
36+
The `current_date()` return value is determined at query time and will return the same date, no matter when in the query plan the function executes.
37+
"#,
38+
syntax_example = "current_date()"
39+
)]
3240
#[derive(Debug)]
3341
pub struct CurrentDateFunc {
3442
signature: Signature,
@@ -105,22 +113,6 @@ impl ScalarUDFImpl for CurrentDateFunc {
105113
}
106114

107115
fn documentation(&self) -> Option<&Documentation> {
108-
Some(get_current_date_doc())
116+
self.doc()
109117
}
110118
}
111-
112-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
113-
114-
fn get_current_date_doc() -> &'static Documentation {
115-
DOCUMENTATION.get_or_init(|| {
116-
Documentation::builder(
117-
DOC_SECTION_DATETIME,
118-
r#"
119-
Returns the current UTC date.
120-
121-
The `current_date()` return value is determined at query time and will return the same date, no matter when in the query plan the function executes.
122-
"#,
123-
"current_date()")
124-
.build()
125-
})
126-
}

datafusion/functions/src/datetime/current_time.rs

+11-19
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,23 @@ use arrow::datatypes::DataType;
1919
use arrow::datatypes::DataType::Time64;
2020
use arrow::datatypes::TimeUnit::Nanosecond;
2121
use std::any::Any;
22-
use std::sync::OnceLock;
2322

2423
use datafusion_common::{internal_err, Result, ScalarValue};
25-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
2624
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
2725
use datafusion_expr::{
2826
ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
2927
};
28+
use datafusion_macros::user_doc;
3029

30+
#[user_doc(
31+
doc_section(label = "Time and Date Functions"),
32+
description = r#"
33+
Returns the current UTC time.
34+
35+
The `current_time()` return value is determined at query time and will return the same time, no matter when in the query plan the function executes.
36+
"#,
37+
syntax_example = "current_time()"
38+
)]
3139
#[derive(Debug)]
3240
pub struct CurrentTimeFunc {
3341
signature: Signature,
@@ -93,22 +101,6 @@ impl ScalarUDFImpl for CurrentTimeFunc {
93101
}
94102

95103
fn documentation(&self) -> Option<&Documentation> {
96-
Some(get_current_time_doc())
104+
self.doc()
97105
}
98106
}
99-
100-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
101-
102-
fn get_current_time_doc() -> &'static Documentation {
103-
DOCUMENTATION.get_or_init(|| {
104-
Documentation::builder(
105-
DOC_SECTION_DATETIME,
106-
r#"
107-
Returns the current UTC time.
108-
109-
The `current_time()` return value is determined at query time and will return the same time, no matter when in the query plan the function executes.
110-
"#,
111-
"current_time()")
112-
.build()
113-
})
114-
}

datafusion/functions/src/datetime/date_bin.rs

+59-61
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use std::any::Any;
19-
use std::sync::{Arc, OnceLock};
19+
use std::sync::Arc;
2020

2121
use arrow::array::temporal_conversions::NANOSECONDS;
2222
use arrow::array::types::{
@@ -37,10 +37,66 @@ use datafusion_expr::TypeSignature::Exact;
3737
use datafusion_expr::{
3838
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
3939
};
40+
use datafusion_macros::user_doc;
4041

4142
use chrono::{DateTime, Datelike, Duration, Months, TimeDelta, Utc};
42-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_DATETIME;
4343

44+
#[user_doc(
45+
doc_section(label = "Time and Date Functions"),
46+
description = r#"
47+
Calculates time intervals and returns the start of the interval nearest to the specified timestamp. Use `date_bin` to downsample time series data by grouping rows into time-based "bins" or "windows" and applying an aggregate or selector function to each window.
48+
49+
For example, if you "bin" or "window" data into 15 minute intervals, an input timestamp of `2023-01-01T18:18:18Z` will be updated to the start time of the 15 minute bin it is in: `2023-01-01T18:15:00Z`.
50+
"#,
51+
syntax_example = "date_bin(interval, expression, origin-timestamp)",
52+
sql_example = r#"```sql
53+
-- Bin the timestamp into 1 day intervals
54+
> SELECT date_bin(interval '1 day', time) as bin
55+
FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time);
56+
+---------------------+
57+
| bin |
58+
+---------------------+
59+
| 2023-01-01T00:00:00 |
60+
| 2023-01-03T00:00:00 |
61+
+---------------------+
62+
2 row(s) fetched.
63+
64+
-- Bin the timestamp into 1 day intervals starting at 3AM on 2023-01-01
65+
> SELECT date_bin(interval '1 day', time, '2023-01-01T03:00:00') as bin
66+
FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time);
67+
+---------------------+
68+
| bin |
69+
+---------------------+
70+
| 2023-01-01T03:00:00 |
71+
| 2023-01-03T03:00:00 |
72+
+---------------------+
73+
2 row(s) fetched.
74+
```"#,
75+
argument(name = "interval", description = "Bin interval"),
76+
argument(
77+
name = "expression",
78+
description = "Time expression to operate on. Can be a constant, column, or function."
79+
),
80+
argument(
81+
name = "origin-timestamp",
82+
description = "Optional. Starting point used to determine bin boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in UTC).
83+
84+
The following intervals are supported:
85+
86+
- nanoseconds
87+
- microseconds
88+
- milliseconds
89+
- seconds
90+
- minutes
91+
- hours
92+
- days
93+
- weeks
94+
- months
95+
- years
96+
- century
97+
"
98+
)
99+
)]
44100
#[derive(Debug)]
45101
pub struct DateBinFunc {
46102
signature: Signature,
@@ -169,68 +225,10 @@ impl ScalarUDFImpl for DateBinFunc {
169225
}
170226
}
171227
fn documentation(&self) -> Option<&Documentation> {
172-
Some(get_date_bin_doc())
228+
self.doc()
173229
}
174230
}
175231

176-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
177-
178-
fn get_date_bin_doc() -> &'static Documentation {
179-
DOCUMENTATION.get_or_init(|| {
180-
Documentation::builder(
181-
DOC_SECTION_DATETIME,
182-
r#"
183-
Calculates time intervals and returns the start of the interval nearest to the specified timestamp. Use `date_bin` to downsample time series data by grouping rows into time-based "bins" or "windows" and applying an aggregate or selector function to each window.
184-
185-
For example, if you "bin" or "window" data into 15 minute intervals, an input timestamp of `2023-01-01T18:18:18Z` will be updated to the start time of the 15 minute bin it is in: `2023-01-01T18:15:00Z`.
186-
"#,
187-
"date_bin(interval, expression, origin-timestamp)")
188-
.with_sql_example(r#"```sql
189-
-- Bin the timestamp into 1 day intervals
190-
> SELECT date_bin(interval '1 day', time) as bin
191-
FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time);
192-
+---------------------+
193-
| bin |
194-
+---------------------+
195-
| 2023-01-01T00:00:00 |
196-
| 2023-01-03T00:00:00 |
197-
+---------------------+
198-
2 row(s) fetched.
199-
200-
-- Bin the timestamp into 1 day intervals starting at 3AM on 2023-01-01
201-
> SELECT date_bin(interval '1 day', time, '2023-01-01T03:00:00') as bin
202-
FROM VALUES ('2023-01-01T18:18:18Z'), ('2023-01-03T19:00:03Z') t(time);
203-
+---------------------+
204-
| bin |
205-
+---------------------+
206-
| 2023-01-01T03:00:00 |
207-
| 2023-01-03T03:00:00 |
208-
+---------------------+
209-
2 row(s) fetched.
210-
```
211-
"#)
212-
.with_argument("interval", "Bin interval.")
213-
.with_argument("expression", "Time expression to operate on. Can be a constant, column, or function.")
214-
.with_argument("origin-timestamp", "Optional. Starting point used to determine bin boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in UTC).
215-
216-
The following intervals are supported:
217-
218-
- nanoseconds
219-
- microseconds
220-
- milliseconds
221-
- seconds
222-
- minutes
223-
- hours
224-
- days
225-
- weeks
226-
- months
227-
- years
228-
- century
229-
")
230-
.build()
231-
})
232-
}
233-
234232
enum Interval {
235233
Nanoseconds(i64),
236234
Months(i64),

datafusion/functions/src/datetime/date_part.rs

+35-42
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
use std::any::Any;
1919
use std::str::FromStr;
20-
use std::sync::{Arc, OnceLock};
20+
use std::sync::Arc;
2121

2222
use arrow::array::{Array, ArrayRef, Float64Array, Int32Array};
2323
use arrow::compute::kernels::cast_utils::IntervalUnit;
@@ -41,11 +41,41 @@ use datafusion_common::{
4141
ExprSchema, Result, ScalarValue,
4242
};
4343
use datafusion_expr::{
44-
scalar_doc_sections::DOC_SECTION_DATETIME, ColumnarValue, Documentation, Expr,
45-
ScalarUDFImpl, Signature, TypeSignature, Volatility,
44+
ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, TypeSignature,
45+
Volatility,
4646
};
4747
use datafusion_expr_common::signature::TypeSignatureClass;
48-
48+
use datafusion_macros::user_doc;
49+
50+
#[user_doc(
51+
doc_section(label = "Time and Date Functions"),
52+
description = "Returns the specified part of the date as an integer.",
53+
syntax_example = "date_part(part, expression)",
54+
alternative_syntax = "extract(field FROM source)",
55+
argument(
56+
name = "part",
57+
description = "Optional. Starting point used to determine bin boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in UTC).
58+
59+
The following intervals are supported:
60+
61+
- nanoseconds
62+
- microseconds
63+
- milliseconds
64+
- seconds
65+
- minutes
66+
- hours
67+
- days
68+
- weeks
69+
- months
70+
- years
71+
- century
72+
"
73+
),
74+
argument(
75+
name = "expression",
76+
description = "Time expression to operate on. Can be a constant, column, or function."
77+
)
78+
)]
4979
#[derive(Debug)]
5080
pub struct DatePartFunc {
5181
signature: Signature,
@@ -190,7 +220,7 @@ impl ScalarUDFImpl for DatePartFunc {
190220
&self.aliases
191221
}
192222
fn documentation(&self) -> Option<&Documentation> {
193-
Some(get_date_part_doc())
223+
self.doc()
194224
}
195225
}
196226

@@ -206,43 +236,6 @@ fn part_normalization(part: &str) -> &str {
206236
.unwrap_or(part)
207237
}
208238

209-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
210-
211-
fn get_date_part_doc() -> &'static Documentation {
212-
DOCUMENTATION.get_or_init(|| {
213-
Documentation::builder(
214-
DOC_SECTION_DATETIME,
215-
"Returns the specified part of the date as an integer.",
216-
"date_part(part, expression)")
217-
.with_argument(
218-
"part",
219-
r#"Part of the date to return. The following date parts are supported:
220-
221-
- year
222-
- quarter (emits value in inclusive range [1, 4] based on which quartile of the year the date is in)
223-
- month
224-
- week (week of the year)
225-
- day (day of the month)
226-
- hour
227-
- minute
228-
- second
229-
- millisecond
230-
- microsecond
231-
- nanosecond
232-
- dow (day of the week)
233-
- doy (day of the year)
234-
- epoch (seconds since Unix epoch)
235-
"#,
236-
)
237-
.with_argument(
238-
"expression",
239-
"Time expression to operate on. Can be a constant, column, or function.",
240-
)
241-
.with_alternative_syntax("extract(field FROM source)")
242-
.build()
243-
})
244-
}
245-
246239
/// Invoke [`date_part`] on an `array` (e.g. Timestamp) and convert the
247240
/// result to a total number of seconds, milliseconds, microseconds or
248241
/// nanoseconds

0 commit comments

Comments
 (0)