Skip to content

Commit f0a1c08

Browse files
author
Cheng-Yuan-Lai
committed
doc-gen: migrate scalar functions (encoding & regex) documentation
1 parent b9cef8c commit f0a1c08

File tree

5 files changed

+167
-181
lines changed

5 files changed

+167
-181
lines changed

datafusion/functions/src/encoding/inner.rs

+29-36
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,27 @@ use datafusion_common::{
3232
use datafusion_common::{exec_err, ScalarValue};
3333
use datafusion_common::{DataFusionError, Result};
3434
use datafusion_expr::{ColumnarValue, Documentation};
35-
use std::sync::{Arc, OnceLock};
35+
use std::sync::Arc;
3636
use std::{fmt, str::FromStr};
3737

38-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_BINARY_STRING;
3938
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
39+
use datafusion_macros::user_doc;
4040
use std::any::Any;
4141

42+
#[user_doc(
43+
doc_section(label = "Binary String Functions"),
44+
description = "Encode binary data into a textual representation.",
45+
syntax_example = "encode(expression, format)",
46+
argument(
47+
name = "expression",
48+
description = "Expression containing string or binary data"
49+
),
50+
argument(
51+
name = "format",
52+
description = "Supported formats are: `base64`, `hex`"
53+
),
54+
related_udf(name = "decode")
55+
)]
4256
#[derive(Debug)]
4357
pub struct EncodeFunc {
4458
signature: Signature,
@@ -58,22 +72,6 @@ impl EncodeFunc {
5872
}
5973
}
6074

61-
static ENCODE_DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
62-
63-
fn get_encode_doc() -> &'static Documentation {
64-
ENCODE_DOCUMENTATION.get_or_init(|| {
65-
Documentation::builder(
66-
DOC_SECTION_BINARY_STRING,
67-
"Encode binary data into a textual representation.",
68-
"encode(expression, format)",
69-
)
70-
.with_argument("expression", "Expression containing string or binary data")
71-
.with_argument("format", "Supported formats are: `base64`, `hex`")
72-
.with_related_udf("decode")
73-
.build()
74-
})
75-
}
76-
7775
impl ScalarUDFImpl for EncodeFunc {
7876
fn as_any(&self) -> &dyn Any {
7977
self
@@ -126,10 +124,21 @@ impl ScalarUDFImpl for EncodeFunc {
126124
}
127125

128126
fn documentation(&self) -> Option<&Documentation> {
129-
Some(get_encode_doc())
127+
self.doc()
130128
}
131129
}
132130

131+
#[user_doc(
132+
doc_section(label = "Binary String Functions"),
133+
description = "Decode binary data from textual representation in string.",
134+
syntax_example = "decode(e xpression, format)",
135+
argument(
136+
name = "expression",
137+
description = "Expression containing string or binary data"
138+
),
139+
argument(name = "format", description = "Same arguments as [encode](#encode)"),
140+
related_udf(name = "encode")
141+
)]
133142
#[derive(Debug)]
134143
pub struct DecodeFunc {
135144
signature: Signature,
@@ -149,22 +158,6 @@ impl DecodeFunc {
149158
}
150159
}
151160

152-
static DECODE_DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
153-
154-
fn get_decode_doc() -> &'static Documentation {
155-
DECODE_DOCUMENTATION.get_or_init(|| {
156-
Documentation::builder(
157-
DOC_SECTION_BINARY_STRING,
158-
"Decode binary data from textual representation in string.",
159-
"decode(expression, format)",
160-
)
161-
.with_argument("expression", "Expression containing encoded string data")
162-
.with_argument("format", "Same arguments as [encode](#encode)")
163-
.with_related_udf("encode")
164-
.build()
165-
})
166-
}
167-
168161
impl ScalarUDFImpl for DecodeFunc {
169162
fn as_any(&self) -> &dyn Any {
170163
self
@@ -217,7 +210,7 @@ impl ScalarUDFImpl for DecodeFunc {
217210
}
218211

219212
fn documentation(&self) -> Option<&Documentation> {
220-
Some(get_decode_doc())
213+
self.doc()
221214
}
222215
}
223216

datafusion/functions/src/regex/regexpcount.rs

+31-33
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,45 @@ use arrow::datatypes::{
2323
};
2424
use arrow::error::ArrowError;
2525
use datafusion_common::{exec_err, internal_err, Result, ScalarValue};
26-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_REGEX;
2726
use datafusion_expr::{
2827
ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature::Exact,
2928
TypeSignature::Uniform, Volatility,
3029
};
30+
use datafusion_macros::user_doc;
3131
use itertools::izip;
3232
use regex::Regex;
3333
use std::collections::hash_map::Entry;
3434
use std::collections::HashMap;
35-
use std::sync::{Arc, OnceLock};
35+
use std::sync::Arc;
3636

37+
#[user_doc(
38+
doc_section(label = "Regular Expression Functions"),
39+
description = "Returns the number of matches that a [regular expression](https://docs.rs/regex/latest/regex/#syntax) has in a string.",
40+
syntax_example = "regexp_count(str, regexp[, start, flags])",
41+
sql_example = r#"```sql
42+
> select regexp_count('abcAbAbc', 'abc', 2, 'i');
43+
+---------------------------------------------------------------+
44+
| regexp_count(Utf8("abcAbAbc"),Utf8("abc"),Int64(2),Utf8("i")) |
45+
+---------------------------------------------------------------+
46+
| 1 |
47+
+---------------------------------------------------------------+
48+
```"#,
49+
standard_argument(name = "str", prefix = "String"),
50+
standard_argument(name = "Regexp", prefix = "Regular"),
51+
argument(
52+
name = "start",
53+
description = "- **start**: Optional start position (the first position is 1) to search for the regular expression. Can be a constant, column, or function."
54+
),
55+
argument(
56+
name = "flags",
57+
description = r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported:
58+
- **i**: case-insensitive: letters match both upper and lower case
59+
- **m**: multi-line mode: ^ and $ match begin/end of line
60+
- **s**: allow . to match \n
61+
- **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used
62+
- **U**: swap the meaning of x* and x*?"#
63+
)
64+
)]
3765
#[derive(Debug)]
3866
pub struct RegexpCountFunc {
3967
signature: Signature,
@@ -111,40 +139,10 @@ impl ScalarUDFImpl for RegexpCountFunc {
111139
}
112140

113141
fn documentation(&self) -> Option<&Documentation> {
114-
Some(get_regexp_count_doc())
142+
self.doc()
115143
}
116144
}
117145

118-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
119-
120-
fn get_regexp_count_doc() -> &'static Documentation {
121-
DOCUMENTATION.get_or_init(|| {
122-
Documentation::builder(
123-
DOC_SECTION_REGEX,
124-
"Returns the number of matches that a [regular expression](https://docs.rs/regex/latest/regex/#syntax) has in a string.",
125-
"regexp_count(str, regexp[, start, flags])")
126-
.with_sql_example(r#"```sql
127-
> select regexp_count('abcAbAbc', 'abc', 2, 'i');
128-
+---------------------------------------------------------------+
129-
| regexp_count(Utf8("abcAbAbc"),Utf8("abc"),Int64(2),Utf8("i")) |
130-
+---------------------------------------------------------------+
131-
| 1 |
132-
+---------------------------------------------------------------+
133-
```"#)
134-
.with_standard_argument("str", Some("String"))
135-
.with_standard_argument("regexp",Some("Regular"))
136-
.with_argument("start", "- **start**: Optional start position (the first position is 1) to search for the regular expression. Can be a constant, column, or function.")
137-
.with_argument("flags",
138-
r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported:
139-
- **i**: case-insensitive: letters match both upper and lower case
140-
- **m**: multi-line mode: ^ and $ match begin/end of line
141-
- **s**: allow . to match \n
142-
- **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used
143-
- **U**: swap the meaning of x* and x*?"#)
144-
.build()
145-
})
146-
}
147-
148146
pub fn regexp_count_func(args: &[ArrayRef]) -> Result<ArrayRef> {
149147
let args_len = args.len();
150148
if !(2..=4).contains(&args_len) {

datafusion/functions/src/regex/regexplike.rs

+30-28
Original file line numberDiff line numberDiff line change
@@ -25,30 +25,18 @@ use datafusion_common::exec_err;
2525
use datafusion_common::ScalarValue;
2626
use datafusion_common::{arrow_datafusion_err, plan_err};
2727
use datafusion_common::{internal_err, DataFusionError, Result};
28-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_REGEX;
2928
use datafusion_expr::{ColumnarValue, Documentation, TypeSignature};
3029
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
30+
use datafusion_macros::user_doc;
3131

3232
use std::any::Any;
33-
use std::sync::{Arc, OnceLock};
33+
use std::sync::Arc;
3434

35-
#[derive(Debug)]
36-
pub struct RegexpLikeFunc {
37-
signature: Signature,
38-
}
39-
40-
impl Default for RegexpLikeFunc {
41-
fn default() -> Self {
42-
Self::new()
43-
}
44-
}
45-
46-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
47-
48-
fn get_regexp_like_doc() -> &'static Documentation {
49-
DOCUMENTATION.get_or_init(|| {
50-
Documentation::builder(DOC_SECTION_REGEX,"Returns true if a [regular expression](https://docs.rs/regex/latest/regex/#syntax) has at least one match in a string, false otherwise.","regexp_like(str, regexp[, flags])")
51-
.with_sql_example(r#"```sql
35+
#[user_doc(
36+
doc_section(label = "Regular Expression Functions"),
37+
description = "Returns true if a [regular expression](https://docs.rs/regex/latest/regex/#syntax) has at least one match in a string, false otherwise.",
38+
syntax_example = "regexp_like(str, regexp[, flags])",
39+
sql_example = r#"```sql
5240
select regexp_like('Köln', '[a-zA-Z]ö[a-zA-Z]{2}');
5341
+--------------------------------------------------------+
5442
| regexp_like(Utf8("Köln"),Utf8("[a-zA-Z]ö[a-zA-Z]{2}")) |
@@ -63,18 +51,32 @@ SELECT regexp_like('aBc', '(b|d)', 'i');
6351
+--------------------------------------------------+
6452
```
6553
Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs)
66-
"#)
67-
.with_standard_argument("str", Some("String"))
68-
.with_standard_argument("regexp", Some("Regular"))
69-
.with_argument("flags",
70-
r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported:
54+
"#,
55+
standard_argument(name = "str", prefix = "String"),
56+
standard_argument(name = "Regexp", prefix = "Regular"),
57+
argument(
58+
name = "start",
59+
description = "- **start**: Optional start position (the first position is 1) to search for the regular expression. Can be a constant, column, or function."
60+
),
61+
argument(
62+
name = "flags",
63+
description = r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported:
7164
- **i**: case-insensitive: letters match both upper and lower case
7265
- **m**: multi-line mode: ^ and $ match begin/end of line
7366
- **s**: allow . to match \n
7467
- **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used
75-
- **U**: swap the meaning of x* and x*?"#)
76-
.build()
77-
})
68+
- **U**: swap the meaning of x* and x*?"#
69+
)
70+
)]
71+
#[derive(Debug)]
72+
pub struct RegexpLikeFunc {
73+
signature: Signature,
74+
}
75+
76+
impl Default for RegexpLikeFunc {
77+
fn default() -> Self {
78+
Self::new()
79+
}
7880
}
7981

8082
impl RegexpLikeFunc {
@@ -142,7 +144,7 @@ impl ScalarUDFImpl for RegexpLikeFunc {
142144
}
143145

144146
fn documentation(&self) -> Option<&Documentation> {
145-
Some(get_regexp_like_doc())
147+
self.doc()
146148
}
147149
}
148150

datafusion/functions/src/regex/regexpmatch.rs

+38-41
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,47 @@ use datafusion_common::{arrow_datafusion_err, plan_err};
2626
use datafusion_common::{
2727
cast::as_generic_string_array, internal_err, DataFusionError, Result,
2828
};
29-
use datafusion_expr::scalar_doc_sections::DOC_SECTION_REGEX;
3029
use datafusion_expr::{ColumnarValue, Documentation, TypeSignature};
3130
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
31+
use datafusion_macros::user_doc;
3232
use std::any::Any;
33-
use std::sync::{Arc, OnceLock};
33+
use std::sync::Arc;
3434

35+
#[user_doc(
36+
doc_section(label = "Regular Expression Functions"),
37+
description = "Returns the first [regular expression](https://docs.rs/regex/latest/regex/#syntax) matches in a string.",
38+
syntax_example = "regexp_match(str, regexp[, flags])",
39+
sql_example = r#"```sql
40+
> select regexp_match('Köln', '[a-zA-Z]ö[a-zA-Z]{2}');
41+
+---------------------------------------------------------+
42+
| regexp_match(Utf8("Köln"),Utf8("[a-zA-Z]ö[a-zA-Z]{2}")) |
43+
+---------------------------------------------------------+
44+
| [Köln] |
45+
+---------------------------------------------------------+
46+
SELECT regexp_match('aBc', '(b|d)', 'i');
47+
+---------------------------------------------------+
48+
| regexp_match(Utf8("aBc"),Utf8("(b|d)"),Utf8("i")) |
49+
+---------------------------------------------------+
50+
| [B] |
51+
+---------------------------------------------------+
52+
```
53+
Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs)
54+
"#,
55+
standard_argument(name = "str", prefix = "String"),
56+
argument(
57+
name = "regexp",
58+
description = "Regular expression to match against. Can be a constant, column, or function."
59+
),
60+
argument(
61+
name = "flags",
62+
description = r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported:
63+
- **i**: case-insensitive: letters match both upper and lower case
64+
- **m**: multi-line mode: ^ and $ match begin/end of line
65+
- **s**: allow . to match \n
66+
- **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used
67+
- **U**: swap the meaning of x* and x*?"#
68+
)
69+
)]
3570
#[derive(Debug)]
3671
pub struct RegexpMatchFunc {
3772
signature: Signature,
@@ -113,48 +148,10 @@ impl ScalarUDFImpl for RegexpMatchFunc {
113148
}
114149

115150
fn documentation(&self) -> Option<&Documentation> {
116-
Some(get_regexp_match_doc())
151+
self.doc()
117152
}
118153
}
119154

120-
static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();
121-
122-
fn get_regexp_match_doc() -> &'static Documentation {
123-
DOCUMENTATION.get_or_init(|| {
124-
Documentation::builder(
125-
DOC_SECTION_REGEX,
126-
"Returns the first [regular expression](https://docs.rs/regex/latest/regex/#syntax) matches in a string.",
127-
"regexp_match(str, regexp[, flags])")
128-
.with_sql_example(r#"```sql
129-
> select regexp_match('Köln', '[a-zA-Z]ö[a-zA-Z]{2}');
130-
+---------------------------------------------------------+
131-
| regexp_match(Utf8("Köln"),Utf8("[a-zA-Z]ö[a-zA-Z]{2}")) |
132-
+---------------------------------------------------------+
133-
| [Köln] |
134-
+---------------------------------------------------------+
135-
SELECT regexp_match('aBc', '(b|d)', 'i');
136-
+---------------------------------------------------+
137-
| regexp_match(Utf8("aBc"),Utf8("(b|d)"),Utf8("i")) |
138-
+---------------------------------------------------+
139-
| [B] |
140-
+---------------------------------------------------+
141-
```
142-
Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs)
143-
"#)
144-
.with_standard_argument("str", Some("String"))
145-
.with_argument("regexp","Regular expression to match against.
146-
Can be a constant, column, or function.")
147-
.with_argument("flags",
148-
r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported:
149-
- **i**: case-insensitive: letters match both upper and lower case
150-
- **m**: multi-line mode: ^ and $ match begin/end of line
151-
- **s**: allow . to match \n
152-
- **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used
153-
- **U**: swap the meaning of x* and x*?"#)
154-
.build()
155-
})
156-
}
157-
158155
fn regexp_match_func(args: &[ArrayRef]) -> Result<ArrayRef> {
159156
match args[0].data_type() {
160157
DataType::Utf8 => regexp_match::<i32>(args),

0 commit comments

Comments
 (0)