Skip to content

Commit

Permalink
doc-gen: migrate scalar functions (string) documentation 1/4
Browse files Browse the repository at this point in the history
  • Loading branch information
Cheng-Yuan-Lai committed Dec 27, 2024
1 parent a08dc0a commit f893f97
Show file tree
Hide file tree
Showing 8 changed files with 168 additions and 240 deletions.
58 changes: 24 additions & 34 deletions datafusion/functions/src/string/ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,33 @@ use arrow::array::{ArrayAccessor, ArrayIter, ArrayRef, AsArray, Int32Array};
use arrow::datatypes::DataType;
use arrow::error::ArrowError;
use datafusion_common::{internal_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use datafusion_macros::user_doc;
use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Returns the Unicode character code of the first character in a string.",
syntax_example = "ascii(str)",
sql_example = r#"```sql
> select ascii('abc');
+--------------------+
| ascii(Utf8("abc")) |
+--------------------+
| 97 |
+--------------------+
> select ascii('🚀');
+-------------------+
| ascii(Utf8("🚀")) |
+-------------------+
| 128640 |
+-------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
related_udf(name = "chr")
)]
#[derive(Debug)]
pub struct AsciiFunc {
signature: Signature,
Expand Down Expand Up @@ -73,41 +94,10 @@ impl ScalarUDFImpl for AsciiFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_ascii_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_ascii_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Returns the Unicode character code of the first character in a string.",
"ascii(str)",
)
.with_sql_example(
r#"```sql
> select ascii('abc');
+--------------------+
| ascii(Utf8("abc")) |
+--------------------+
| 97 |
+--------------------+
> select ascii('🚀');
+-------------------+
| ascii(Utf8("🚀")) |
+-------------------+
| 128640 |
+-------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_related_udf("chr")
.build()
})
}

fn calculate_ascii<'a, V>(array: V) -> Result<ArrayRef, ArrowError>
where
V: ArrayAccessor<Item = &'a str>,
Expand Down
47 changes: 18 additions & 29 deletions datafusion/functions/src/string/bit_length.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,29 @@
use arrow::compute::kernels::length::bit_length;
use arrow::datatypes::DataType;
use std::any::Any;
use std::sync::OnceLock;

use crate::utils::utf8_to_int_type;
use datafusion_common::{exec_err, Result, ScalarValue};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
use datafusion_macros::user_doc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Returns the bit length of a string.",
syntax_example = "bit_length(str)",
sql_example = r#"```sql
> select bit_length('datafusion');
+--------------------------------+
| bit_length(Utf8("datafusion")) |
+--------------------------------+
| 80 |
+--------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
related_udf(name = "length"),
related_udf(name = "octet_length")
)]
#[derive(Debug)]
pub struct BitLengthFunc {
signature: Signature,
Expand Down Expand Up @@ -92,32 +107,6 @@ impl ScalarUDFImpl for BitLengthFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_bit_length_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_bit_length_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Returns the bit length of a string.",
"bit_length(str)",
)
.with_sql_example(
r#"```sql
> select bit_length('datafusion');
+--------------------------------+
| bit_length(Utf8("datafusion")) |
+--------------------------------+
| 80 |
+--------------------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_related_udf("length")
.with_related_udf("octet_length")
.build()
})
}
53 changes: 24 additions & 29 deletions datafusion/functions/src/string/btrim.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,11 @@ use arrow::array::{ArrayRef, OffsetSizeTrait};
use arrow::datatypes::DataType;
use datafusion_common::{exec_err, Result};
use datafusion_expr::function::Hint;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, Volatility,
};
use datafusion_macros::user_doc;
use std::any::Any;
use std::sync::OnceLock;

/// Returns the longest string with leading and trailing characters removed. If the characters are not specified, whitespace is removed.
/// btrim('xyxtrimyyx', 'xyz') = 'trim'
Expand All @@ -35,6 +34,28 @@ fn btrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
general_trim::<T>(args, TrimType::Both, use_string_view)
}

#[user_doc(
doc_section(label = "String Functions"),
description = "Trims the specified trim string from the start and end of a string. If no trim string is provided, all whitespace is removed from the start and end of the input string.",
syntax_example = "btrim(str[, trim_str])",
alternative_syntax = "trim(BOTH trim_str FROM str)",
alternative_syntax = "trim(trim_str FROM str)",
sql_example = r#"```sql
> select btrim('__datafusion____', '_');
+-------------------------------------------+
| btrim(Utf8("__datafusion____"),Utf8("_")) |
+-------------------------------------------+
| datafusion |
+-------------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(
name = "trim_str",
description = "String expression to operate on. Can be a constant, column, or function, and any combination of operators. _Default is whitespace characters._"
),
related_udf(name = "ltrim"),
related_udf(name = "rtrim")
)]
#[derive(Debug)]
pub struct BTrimFunc {
signature: Signature,
Expand Down Expand Up @@ -106,36 +127,10 @@ impl ScalarUDFImpl for BTrimFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_btrim_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_btrim_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Trims the specified trim string from the start and end of a string. If no trim string is provided, all whitespace is removed from the start and end of the input string.",
"btrim(str[, trim_str])")
.with_sql_example(r#"```sql
> select btrim('__datafusion____', '_');
+-------------------------------------------+
| btrim(Utf8("__datafusion____"),Utf8("_")) |
+-------------------------------------------+
| datafusion |
+-------------------------------------------+
```"#)
.with_standard_argument("str", Some("String"))
.with_argument("trim_str", "String expression to operate on. Can be a constant, column, or function, and any combination of operators. _Default is whitespace characters._")
.with_alternative_syntax("trim(BOTH trim_str FROM str)")
.with_alternative_syntax("trim(trim_str FROM str)")
.with_related_udf("ltrim")
.with_related_udf("rtrim")
.build()
})
}

#[cfg(test)]
mod tests {
use arrow::array::{Array, StringArray, StringViewArray};
Expand Down
46 changes: 18 additions & 28 deletions datafusion/functions/src/string/chr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

use arrow::array::ArrayRef;
use arrow::array::StringArray;
Expand All @@ -27,9 +27,9 @@ use arrow::datatypes::DataType::Utf8;
use crate::utils::make_scalar_function;
use datafusion_common::cast::as_int64_array;
use datafusion_common::{exec_err, Result};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{ColumnarValue, Documentation, Volatility};
use datafusion_expr::{ScalarUDFImpl, Signature};
use datafusion_macros::user_doc;

/// Returns the character with the given code. chr(0) is disallowed because text data types cannot store that character.
/// chr(65) = 'A'
Expand Down Expand Up @@ -60,6 +60,21 @@ pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
Ok(Arc::new(result) as ArrayRef)
}

#[user_doc(
doc_section(label = "String Functions"),
description = "Returns the character with the specified ASCII or Unicode code value.",
syntax_example = "chr(expression)",
sql_example = r#"```sql
> select chr(128640);
+--------------------+
| chr(Int64(128640)) |
+--------------------+
| 🚀 |
+--------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
related_udf(name = "ascii")
)]
#[derive(Debug)]
pub struct ChrFunc {
signature: Signature,
Expand Down Expand Up @@ -105,31 +120,6 @@ impl ScalarUDFImpl for ChrFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_chr_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_chr_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Returns the character with the specified ASCII or Unicode code value.",
"chr(expression)",
)
.with_sql_example(
r#"```sql
> select chr(128640);
+--------------------+
| chr(Int64(128640)) |
+--------------------+
| 🚀 |
+--------------------+
```"#,
)
.with_standard_argument("expression", Some("String"))
.with_related_udf("ascii")
.build()
})
}
46 changes: 18 additions & 28 deletions datafusion/functions/src/string/contains.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,28 @@ use arrow::datatypes::DataType::{Boolean, LargeUtf8, Utf8, Utf8View};
use datafusion_common::exec_err;
use datafusion_common::DataFusionError;
use datafusion_common::Result;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_STRING;
use datafusion_expr::{
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_macros::user_doc;
use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

#[user_doc(
doc_section(label = "String Functions"),
description = "Return true if search_str is found within string (case-sensitive).",
syntax_example = "contains(str, search_str)",
sql_example = r#"```sql
> select contains('the quick brown fox', 'row');
+---------------------------------------------------+
| contains(Utf8("the quick brown fox"),Utf8("row")) |
+---------------------------------------------------+
| true |
+---------------------------------------------------+
```"#,
standard_argument(name = "str", prefix = "String"),
argument(name = "search_str", description = "The string to search for in str.")
)]
#[derive(Debug)]
pub struct ContainsFunc {
signature: Signature,
Expand Down Expand Up @@ -75,35 +90,10 @@ impl ScalarUDFImpl for ContainsFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_contains_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_contains_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_STRING,
"Return true if search_str is found within string (case-sensitive).",
"contains(str, search_str)",
)
.with_sql_example(
r#"```sql
> select contains('the quick brown fox', 'row');
+---------------------------------------------------+
| contains(Utf8("the quick brown fox"),Utf8("row")) |
+---------------------------------------------------+
| true |
+---------------------------------------------------+
```"#,
)
.with_standard_argument("str", Some("String"))
.with_argument("search_str", "The string to search for in str.")
.build()
})
}

/// use `arrow::compute::contains` to do the calculation for contains
pub fn contains(args: &[ArrayRef]) -> Result<ArrayRef, DataFusionError> {
match (args[0].data_type(), args[1].data_type()) {
Expand Down
Loading

0 comments on commit f893f97

Please sign in to comment.