Skip to content

Commit

Permalink
doc-gen: migrate scalar functions (string) documentation 2/4
Browse files Browse the repository at this point in the history
  • Loading branch information
Cheng-Yuan-Lai committed Dec 27, 2024
1 parent a08dc0a commit 1454bea
Show file tree
Hide file tree
Showing 8 changed files with 178 additions and 235 deletions.
51 changes: 22 additions & 29 deletions datafusion/functions/src/string/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use arrow::array::{as_largestring_array, Array};
use arrow::datatypes::DataType;
use datafusion_expr::sort_properties::ExprProperties;
use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use crate::string::concat;
use crate::strings::{
Expand All @@ -28,11 +28,30 @@ use crate::strings::{
use datafusion_common::cast::{as_string_array, as_string_view_array};
use datafusion_common::{internal_err, plan_err, Result, ScalarValue};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
use datafusion_expr::{lit, ColumnarValue, Documentation, Expr, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Concatenates multiple strings together.",
syntax_example = "concat(str[, ..., str_n])",
sql_example = r#"```sql
> select concat('data', 'f', 'us', 'ion');
+-------------------------------------------------------+
| concat(Utf8("data"),Utf8("f"),Utf8("us"),Utf8("ion")) |
+-------------------------------------------------------+
| datafusion |
+-------------------------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(
name = "str_n",
description = "Subsequent string expressions to concatenate."
),
related_udf(name = "concat_ws")
)]
#[derive(Debug)]
pub struct ConcatFunc {
signature: Signature,
Expand Down Expand Up @@ -264,40 +283,14 @@ impl ScalarUDFImpl for ConcatFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_concat_doc())
self.doc()
}

fn preserves_lex_ordering(&self, _inputs: &[ExprProperties]) -> Result<bool> {
Ok(true)
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_concat_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Concatenates multiple strings together.",
"concat(str[, ..., str_n])",
)
.with_sql_example(
r#"```sql
> select concat('data', 'f', 'us', 'ion');
+-------------------------------------------------------+
| concat(Utf8("data"),Utf8("f"),Utf8("us"),Utf8("ion")) |
+-------------------------------------------------------+
| datafusion |
+-------------------------------------------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_argument("str_n", "Subsequent string expressions to concatenate.")
.with_related_udf("concat_ws")
.build()
})
}

pub fn simplify_concat(args: Vec<Expr>) -> Result<ExprSimplifyResult> {
let mut new_args = Vec::with_capacity(args.len());
let mut contiguous_scalar = "".to_string();
Expand Down
59 changes: 26 additions & 33 deletions datafusion/functions/src/string/concat_ws.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use arrow::array::{as_largestring_array, Array, StringArray};
use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::datatypes::DataType;

Expand All @@ -27,11 +27,34 @@ use crate::strings::{ColumnarValueRef, StringArrayBuilder};
use datafusion_common::cast::{as_string_array, as_string_view_array};
use datafusion_common::{exec_err, internal_err, plan_err, Result, ScalarValue};
use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
use datafusion_expr::{lit, ColumnarValue, Documentation, Expr, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Concatenates multiple strings together with a specified separator.",
syntax_example = "concat_ws(separator, str[, ..., str_n])",
sql_example = r#"```sql
> select concat_ws('_', 'data', 'fusion');
+--------------------------------------------------+
| concat_ws(Utf8("_"),Utf8("data"),Utf8("fusion")) |
+--------------------------------------------------+
| data_fusion |
+--------------------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(
name = "separator",
description = "Separator to insert between concatenated strings."
),
argument(
name = "str_n",
description = "Subsequent string expressions to concatenate."
),
related_udf(name = "concat")
)]
#[derive(Debug)]
pub struct ConcatWsFunc {
signature: Signature,
Expand Down Expand Up @@ -271,40 +294,10 @@ impl ScalarUDFImpl for ConcatWsFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_concat_ws_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_concat_ws_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Concatenates multiple strings together with a specified separator.",
"concat_ws(separator, str[, ..., str_n])",
)
.with_sql_example(
r#"```sql
> select concat_ws('_', 'data', 'fusion');
+--------------------------------------------------+
| concat_ws(Utf8("_"),Utf8("data"),Utf8("fusion")) |
+--------------------------------------------------+
| data_fusion |
+--------------------------------------------------+
```"#,
)
.with_argument(
"separator",
"Separator to insert between concatenated strings.",
)
.with_standard_argument("str", Some("String"))
.with_argument("str_n", "Subsequent string expressions to concatenate.")
.with_related_udf("concat")
.build()
})
}

fn simplify_concat_ws(delimiter: &Expr, args: &[Expr]) -> Result<ExprSimplifyResult> {
match delimiter {
Expr::Literal(
Expand Down
58 changes: 24 additions & 34 deletions datafusion/functions/src/string/ends_with.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,38 @@
// under the License.

use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::ArrayRef;
use arrow::datatypes::DataType;

use crate::utils::make_scalar_function;
use datafusion_common::{internal_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Tests if a string ends with a substring.",
syntax_example = "ends_with(str, substr)",
sql_example = r#"```sql
> select ends_with('datafusion', 'soin');
+--------------------------------------------+
| ends_with(Utf8("datafusion"),Utf8("soin")) |
+--------------------------------------------+
| false |
+--------------------------------------------+
> select ends_with('datafusion', 'sion');
+--------------------------------------------+
| ends_with(Utf8("datafusion"),Utf8("sion")) |
+--------------------------------------------+
| true |
+--------------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(name = "substr", description = "Substring to test for.")
)]
#[derive(Debug)]
pub struct EndsWithFunc {
signature: Signature,
Expand Down Expand Up @@ -79,41 +100,10 @@ impl ScalarUDFImpl for EndsWithFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_ends_with_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_ends_with_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Tests if a string ends with a substring.",
"ends_with(str, substr)",
)
.with_sql_example(
r#"```sql
> select ends_with('datafusion', 'soin');
+--------------------------------------------+
| ends_with(Utf8("datafusion"),Utf8("soin")) |
+--------------------------------------------+
| false |
+--------------------------------------------+
> select ends_with('datafusion', 'sion');
+--------------------------------------------+
| ends_with(Utf8("datafusion"),Utf8("sion")) |
+--------------------------------------------+
| true |
+--------------------------------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_argument("substr", "Substring to test for.")
.build()
})
}

/// Returns true if string ends with suffix.
/// ends_with('alphabet', 'abet') = 't'
pub fn ends_with(args: &[ArrayRef]) -> Result<ArrayRef> {
Expand Down
49 changes: 24 additions & 25 deletions datafusion/functions/src/string/levenshtein.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::{ArrayRef, Int32Array, Int64Array, OffsetSizeTrait};
use arrow::datatypes::DataType;
Expand All @@ -25,10 +25,31 @@ use crate::utils::{make_scalar_function, utf8_to_int_type};
use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
use datafusion_common::utils::datafusion_strsim;
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Returns the [`Levenshtein distance`](https://en.wikipedia.org/wiki/Levenshtein_distance) between the two given strings.",
syntax_example = "levenshtein(str1, str2)",
sql_example = r#"```sql
> select levenshtein('kitten', 'sitting');
+---------------------------------------------+
| levenshtein(Utf8("kitten"),Utf8("sitting")) |
+---------------------------------------------+
| 3 |
+---------------------------------------------+
```"#,
argument(
name = "str1",
description = "String expression to compute Levenshtein distance with str2."
),
argument(
name = "str2",
description = "String expression to compute Levenshtein distance with str1."
)
)]
#[derive(Debug)]
pub struct LevenshteinFunc {
signature: Signature,
Expand Down Expand Up @@ -82,32 +103,10 @@ impl ScalarUDFImpl for LevenshteinFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_levenshtein_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_levenshtein_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Returns the [`Levenshtein distance`](https://en.wikipedia.org/wiki/Levenshtein_distance) between the two given strings.",
"levenshtein(str1, str2)")
.with_sql_example(r#"```sql
> select levenshtein('kitten', 'sitting');
+---------------------------------------------+
| levenshtein(Utf8("kitten"),Utf8("sitting")) |
+---------------------------------------------+
| 3 |
+---------------------------------------------+
```"#)
.with_argument("str1", "String expression to compute Levenshtein distance with str2.")
.with_argument("str2", "String expression to compute Levenshtein distance with str1.")
.build()
})
}

///Returns the Levenshtein distance between the two given strings.
/// LEVENSHTEIN('kitten', 'sitting') = 3
pub fn levenshtein<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
Expand Down
46 changes: 18 additions & 28 deletions datafusion/functions/src/string/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,30 @@

use arrow::datatypes::DataType;
use std::any::Any;
use std::sync::OnceLock;

use crate::string::common::to_lower;
use crate::utils::utf8_to_str_type;
use datafusion_common::Result;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Converts a string to lower-case.",
syntax_example = "lower(str)",
sql_example = r#"```sql
> select lower('Ångström');
+-------------------------+
| lower(Utf8("Ångström")) |
+-------------------------+
| ångström |
+-------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
related_udf(name = "initcap"),
related_udf(name = "upper")
)]
#[derive(Debug)]
pub struct LowerFunc {
signature: Signature,
Expand Down Expand Up @@ -71,35 +86,10 @@ impl ScalarUDFImpl for LowerFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_lower_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_lower_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Converts a string to lower-case.",
"lower(str)",
)
.with_sql_example(
r#"```sql
> select lower('Ångström');
+-------------------------+
| lower(Utf8("Ångström")) |
+-------------------------+
| ångström |
+-------------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_related_udf("initcap")
.with_related_udf("upper")
.build()
})
}
#[cfg(test)]
mod tests {
use super::*;
Expand Down
Loading

0 comments on commit 1454bea

Please sign in to comment.