From d7b98e3a9b6497d995ab76a0da84e1444ae3f875 Mon Sep 17 00:00:00 2001 From: Dadepo Aderemi Date: Sun, 19 Nov 2023 10:14:20 +0400 Subject: [PATCH] Switch to one crate --- Cargo.lock | 19 +------- Cargo.toml | 18 ++++---- README.md | 12 +++++- common/Cargo.toml | 8 ---- df_extras_postgres/.gitignore | 4 -- df_extras_postgres/Cargo.toml | 14 ------ df_extras_sqlite/.gitignore | 4 -- df_extras_sqlite/Cargo.toml | 15 ------- df_extras_sqlite/README.md | 43 ------------------- common/src/lib.rs => src/common/mod.rs | 0 {common/src => src/common}/test_utils.rs | 0 src/lib.rs | 3 ++ .../src => src/postgres}/math_udfs.rs | 7 ++- .../src/lib.rs => src/postgres/mod.rs | 13 +++--- .../src => src/postgres}/network_udfs.rs | 8 ++-- .../src => src/sqlite}/json_udfs.rs | 8 ++-- .../src/lib.rs => src/sqlite/mod.rs | 4 +- .../README.md => supports/postgres.md | 0 supports/sqlite.md | 0 19 files changed, 40 insertions(+), 140 deletions(-) delete mode 100644 common/Cargo.toml delete mode 100644 df_extras_postgres/.gitignore delete mode 100644 df_extras_postgres/Cargo.toml delete mode 100644 df_extras_sqlite/.gitignore delete mode 100644 df_extras_sqlite/Cargo.toml delete mode 100644 df_extras_sqlite/README.md rename common/src/lib.rs => src/common/mod.rs (100%) rename {common/src => src/common}/test_utils.rs (100%) create mode 100644 src/lib.rs rename {df_extras_postgres/src => src/postgres}/math_udfs.rs (95%) rename df_extras_postgres/src/lib.rs => src/postgres/mod.rs (97%) rename {df_extras_postgres/src => src/postgres}/network_udfs.rs (99%) rename {df_extras_sqlite/src => src/sqlite}/json_udfs.rs (97%) rename df_extras_sqlite/src/lib.rs => src/sqlite/mod.rs (90%) rename df_extras_postgres/README.md => supports/postgres.md (100%) create mode 100644 supports/sqlite.md diff --git a/Cargo.lock b/Cargo.lock index 604ea2a..515e402 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -491,13 +491,6 @@ dependencies = [ "unicode-width", ] -[[package]] -name = "common" -version = "0.1.0" -dependencies = [ - "datafusion", -] - [[package]] name = "const-random" version = "0.1.15" @@ -801,21 +794,11 @@ dependencies = [ ] [[package]] -name = "df_extras_postgres" +name = "df_extras" version = "0.1.0" dependencies = [ - "common", "datafusion", "ipnet", - "tokio", -] - -[[package]] -name = "df_extras_sqlite" -version = "0.1.0" -dependencies = [ - "common", - "datafusion", "serde", "serde_json", "tokio", diff --git a/Cargo.toml b/Cargo.toml index 0ce8ab8..a086584 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,15 +1,15 @@ -[workspace] -resolver = "2" +[package] +name = "df_extras" +version = "0.1.0" +edition = "2021" -members = [ - "common", - "df_extras_postgres", - "df_extras_sqlite", -] - -[workspace.dependencies] +[dependencies] datafusion = "32.0.0" ipnet = "2.7.2" serde = "1.0.192" serde_json = { version = "1.0.108", features = ["preserve_order"] } tokio = { version = "1.25.0", features = ["macros", "rt", "parking_lot"] } + +[features] +sqlite = [] +postgres = [] \ No newline at end of file diff --git a/README.md b/README.md index 38ddb30..f773748 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,14 @@ A collection of user defined functions, from your favourite databases, in DataFusion. -## Crates +## Road to 0.0.1 -[df_extras_postgres](./df_postgres/README.md) \ No newline at end of file +| **Postgres** | **[Details](supports/postgres.md)** | +|-----------------|-------------------------------------| +| Networking | ✅︎ Done | +| Maths | 🚧︎ Ongoing | +| JSON | ⭘ Not Started | +| **Sqlite** | **[Details](supports/sqlite.md)** | +| JSON | 🚧︎ Ongoing | +| Built-In Scalar | ⭘ Not Started | +| Maths | ⭘ Not Started | diff --git a/common/Cargo.toml b/common/Cargo.toml deleted file mode 100644 index 043a329..0000000 --- a/common/Cargo.toml +++ /dev/null @@ -1,8 +0,0 @@ -[package] -name = "common" -version = "0.1.0" -edition = "2021" -publish = false - -[dependencies] -datafusion = { workspace = true } diff --git a/df_extras_postgres/.gitignore b/df_extras_postgres/.gitignore deleted file mode 100644 index 9bb92e8..0000000 --- a/df_extras_postgres/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -**/target/ -**/.idea/ -**/.DS_Store -/Cargo.lock diff --git a/df_extras_postgres/Cargo.toml b/df_extras_postgres/Cargo.toml deleted file mode 100644 index 7a73290..0000000 --- a/df_extras_postgres/Cargo.toml +++ /dev/null @@ -1,14 +0,0 @@ -[package] -name = "df_extras_postgres" -version = "0.1.0" -edition = "2021" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -common = { path = "../common" } -datafusion = { workspace = true } -ipnet = { workspace = true } -tokio = { workspace = true } - -[features] -postgres = [] diff --git a/df_extras_sqlite/.gitignore b/df_extras_sqlite/.gitignore deleted file mode 100644 index 9bb92e8..0000000 --- a/df_extras_sqlite/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -**/target/ -**/.idea/ -**/.DS_Store -/Cargo.lock diff --git a/df_extras_sqlite/Cargo.toml b/df_extras_sqlite/Cargo.toml deleted file mode 100644 index 54b631b..0000000 --- a/df_extras_sqlite/Cargo.toml +++ /dev/null @@ -1,15 +0,0 @@ -[package] -name = "df_extras_sqlite" -version = "0.1.0" -edition = "2021" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -common = { path = "../common" } -datafusion = { workspace = true } -serde = { workspace = true } -serde_json = { workspace = true } -tokio = { workspace = true } - -[features] -sqlite = [] diff --git a/df_extras_sqlite/README.md b/df_extras_sqlite/README.md deleted file mode 100644 index d74e82c..0000000 --- a/df_extras_sqlite/README.md +++ /dev/null @@ -1,43 +0,0 @@ -### Network Address Functions -https://www.postgresql.org/docs/16/functions-net.html - -| Implemented | Function | Return Type | Description | Example | Result | -|-------------|-------------------------|-------------|-----------------------------------------------------------|------------------------------------------------------|-----------------| -| ❓ | abbrev(inet) | text | abbreviated display format as text | abbrev(inet '10.1.0.0/16') | 10.1.0.0/16 | -| ❓ | abbrev(cidr) | text | abbreviated display format as text | abbrev(cidr '10.1.0.0/16') | 10.1/16 | -| ✔ | broadcast(inet) | inet | broadcast address for network | broadcast('192.168.1.5/24') | 192.168.1.255/24| -| ✔ | family(inet) | int | extract family of address; 4 for IPv4, 6 for IPv6 | family('::1') | 6 | -| ✔ | host(inet) | text | extract IP address as text | host('192.168.1.5/24') | 192.168.1.5 | -| ✔ | hostmask(inet) | inet | construct host mask for network | hostmask('192.168.23.20/30') | 0.0.0.3 | -| ✔ | masklen(inet) | int | extract netmask length | masklen('192.168.1.5/24') | 24 | -| ✔ | netmask(inet) | inet | construct netmask for network | netmask('192.168.1.5/24') | 255.255.255.0 | -| ✔ | network(inet) | cidr | extract network part of address | network('192.168.1.5/24') | 192.168.1.0/24 | -| ✔ | set_masklen(inet, int) | inet | set netmask length for inet value | set_masklen('192.168.1.5/24', 16) | 192.168.1.5/16 | -| ✔ | set_masklen(cidr, int) | cidr | set netmask length for cidr value | set_masklen('192.168.1.0/24'::cidr, 16) | 192.168.0.0/16 | -| ❓ | text(inet) | text | extract IP address and netmask length as text | text(inet '192.168.1.5') | 192.168.1.5/32 | -| ✔ | inet_same_family(inet, inet) | boolean | are the addresses from the same family? | inet_same_family('192.168.1.5/24', '::1') | false | -| ✔ | inet_merge(inet, inet) | cidr | the smallest network which includes both of the given networks | inet_merge('192.168.1.5/24', '192.168.2.5/24') | - -### Mathematics Functions -https://www.postgresql.org/docs/16/functions-math.html - -| Implemented | Function | Description | Example | -|-------------|--------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------| -| ✔ | ceiling ( numeric ) → numeric | Nearest integer greater than or equal to argument (same as ceil) | ceiling(95.3) → 96 | -| ✔ | div ( y numeric, x numeric ) → numeric | Integer quotient of y/x (truncates towards zero) | div(9, 4) → 2 | -| ❓ | erf ( double precision ) → double precision | Error function | erf(1.0) → 0.8427007929497149 | -| ❓ | erfc ( double precision ) → double precision | Complementary error function (1 - erf(x), without loss of precision for large inputs) | erfc(1.0) → 0.15729920705028513 | -| ❓ | min_scale ( numeric ) → integer | Minimum scale (number of fractional decimal digits) needed to represent the supplied value precisely | min_scale(8.4100) → 2 | -| ❓ | mod ( y numeric_type, x numeric_type ) → numeric_type | Remainder of y/x; available for smallint, integer, bigint, and numeric | mod(9, 4) → 1 | -| ❓ | scale ( numeric ) → integer | Scale of the argument (the number of decimal digits in the fractional part) | scale(8.4100) → 4 | -| ❓ | sign ( numeric ) → numeric | Sign of the argument (-1, 0, or +1) | sign(-8.4) → -1 | -| ❓ | trim_scale ( numeric ) → numeric | Reduces the value's scale (number of fractional decimal digits) by removing trailing zeroes | trim_scale(8.4100) → 8.41 | -| ❓ | width_bucket ( operand numeric, low numeric, high numeric, count integer ) → integer | Returns the number of the bucket in which operand falls in a histogram having count equal-width buckets spanning the range low to high. Returns 0 or count+1 for an input outside that range. | width_bucket(5.35, 0.024, 10.06, 5) → 3 | -| ❓ | random_normal ( [ mean double precision [, stddev double precision ]] ) → double precision | Returns a random value from the normal distribution with the given parameters; mean defaults to 0.0 and stddev defaults to 1.0 | random_normal(0.0, 1.0) → 0.051285419 | -| ❓ | acosd ( double precision ) → double precision | Inverse cosine, result in degrees | acosd(0.5) → 60 | -| ❓ | asind ( double precision ) → double precision | Inverse sine, result in degrees | asind(0.5) → 30 | -| ❓ | atand ( double precision ) → double precision | Inverse tangent, result in degrees | atand(1) → 45 | -| ❓ | cosd ( double precision ) → double precision | Cosine, argument in degrees | cosd(60) → 0.5 | -| ❓ | cotd ( double precision ) → double precision | Cotangent, argument in degrees | cotd(45) → 1 | -| ❓ | sind ( double precision ) → double precision | Sine, argument in degrees | sind(30) → 0.5 | -| ❓ | tand ( double precision ) → double precision | Tangent, argument in degrees | tand(45) → 1 | diff --git a/common/src/lib.rs b/src/common/mod.rs similarity index 100% rename from common/src/lib.rs rename to src/common/mod.rs diff --git a/common/src/test_utils.rs b/src/common/test_utils.rs similarity index 100% rename from common/src/test_utils.rs rename to src/common/test_utils.rs diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..171a9d0 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,3 @@ +pub mod common; +pub mod postgres; +pub mod sqlite; diff --git a/df_extras_postgres/src/math_udfs.rs b/src/postgres/math_udfs.rs similarity index 95% rename from df_extras_postgres/src/math_udfs.rs rename to src/postgres/math_udfs.rs index db48ad4..e885298 100644 --- a/df_extras_postgres/src/math_udfs.rs +++ b/src/postgres/math_udfs.rs @@ -39,12 +39,11 @@ pub fn div(args: &[ArrayRef]) -> Result { #[cfg(feature = "postgres")] #[cfg(test)] mod tests { - use common::test_utils::set_up_network_data_test; + use crate::common::test_utils::set_up_network_data_test; + use crate::postgres::register_postgres_udfs; use datafusion::assert_batches_sorted_eq; use datafusion::prelude::SessionContext; - use crate::register_udfs; - use super::*; #[tokio::test] @@ -101,7 +100,7 @@ mod tests { fn register_udfs_for_test() -> Result { let ctx = set_up_network_data_test()?; - register_udfs(&ctx)?; + register_postgres_udfs(&ctx)?; Ok(ctx) } } diff --git a/df_extras_postgres/src/lib.rs b/src/postgres/mod.rs similarity index 97% rename from df_extras_postgres/src/lib.rs rename to src/postgres/mod.rs index 2f6204a..d9cf7a5 100644 --- a/df_extras_postgres/src/lib.rs +++ b/src/postgres/mod.rs @@ -2,22 +2,21 @@ use std::sync::Arc; +use crate::postgres::math_udfs::{ceiling, div}; +use crate::postgres::network_udfs::{ + broadcast, family, host, hostmask, inet_merge, inet_same_family, masklen, netmask, network, + set_masklen, +}; use datafusion::arrow::datatypes::DataType::{Boolean, Float64, Int64, UInt8, Utf8}; use datafusion::error::Result; use datafusion::logical_expr::{ReturnTypeFunction, ScalarUDF, Signature, Volatility}; use datafusion::physical_expr::functions::make_scalar_function; use datafusion::prelude::SessionContext; -use crate::math_udfs::{ceiling, div}; -use crate::network_udfs::{ - broadcast, family, host, hostmask, inet_merge, inet_same_family, masklen, netmask, network, - set_masklen, -}; - mod math_udfs; mod network_udfs; -pub fn register_udfs(ctx: &SessionContext) -> Result<()> { +pub fn register_postgres_udfs(ctx: &SessionContext) -> Result<()> { register_network_udfs(ctx)?; register_math_udfs(ctx)?; Ok(()) diff --git a/df_extras_postgres/src/network_udfs.rs b/src/postgres/network_udfs.rs similarity index 99% rename from df_extras_postgres/src/network_udfs.rs rename to src/postgres/network_udfs.rs index 104004c..9eff013 100644 --- a/df_extras_postgres/src/network_udfs.rs +++ b/src/postgres/network_udfs.rs @@ -420,13 +420,11 @@ fn bit_in_common(l: &[u8], r: &[u8], n: usize) -> usize { #[cfg(feature = "postgres")] #[cfg(test)] mod tests { + use crate::common::test_utils::set_up_network_data_test; + use crate::postgres::register_postgres_udfs; use datafusion::assert_batches_sorted_eq; use datafusion::prelude::SessionContext; - use common::test_utils::set_up_network_data_test; - - use crate::register_udfs; - use super::*; #[tokio::test] @@ -923,7 +921,7 @@ mod tests { fn register_udfs_for_test() -> Result { let ctx = set_up_network_data_test()?; - register_udfs(&ctx)?; + register_postgres_udfs(&ctx)?; Ok(ctx) } } diff --git a/df_extras_sqlite/src/json_udfs.rs b/src/sqlite/json_udfs.rs similarity index 97% rename from df_extras_sqlite/src/json_udfs.rs rename to src/sqlite/json_udfs.rs index 78a388e..8945b58 100644 --- a/df_extras_sqlite/src/json_udfs.rs +++ b/src/sqlite/json_udfs.rs @@ -59,13 +59,11 @@ pub fn json_valid(args: &[ArrayRef]) -> Result { #[cfg(feature = "sqlite")] #[cfg(test)] mod tests { + use crate::common::test_utils::set_up_json_data_test; + use crate::sqlite::register_sqlite_udfs; use datafusion::assert_batches_sorted_eq; use datafusion::prelude::SessionContext; - use common::test_utils::set_up_json_data_test; - - use crate::register_udfs; - use super::*; #[tokio::test] @@ -150,7 +148,7 @@ mod tests { fn register_udfs_for_test() -> Result { let ctx = set_up_json_data_test()?; - register_udfs(&ctx)?; + register_sqlite_udfs(&ctx)?; Ok(ctx) } } diff --git a/df_extras_sqlite/src/lib.rs b/src/sqlite/mod.rs similarity index 90% rename from df_extras_sqlite/src/lib.rs rename to src/sqlite/mod.rs index 47953ed..6fc1932 100644 --- a/df_extras_sqlite/src/lib.rs +++ b/src/sqlite/mod.rs @@ -2,7 +2,7 @@ mod json_udfs; -use crate::json_udfs::{json, json_valid}; +use crate::sqlite::json_udfs::{json, json_valid}; use datafusion::arrow::datatypes::DataType::{UInt8, Utf8}; use datafusion::error::Result; use datafusion::logical_expr::{ReturnTypeFunction, ScalarUDF, Signature, Volatility}; @@ -10,7 +10,7 @@ use datafusion::physical_expr::functions::make_scalar_function; use datafusion::prelude::SessionContext; use std::sync::Arc; -pub fn register_udfs(ctx: &SessionContext) -> Result<()> { +pub fn register_sqlite_udfs(ctx: &SessionContext) -> Result<()> { register_json(ctx); register_json_valid(ctx); Ok(()) diff --git a/df_extras_postgres/README.md b/supports/postgres.md similarity index 100% rename from df_extras_postgres/README.md rename to supports/postgres.md diff --git a/supports/sqlite.md b/supports/sqlite.md new file mode 100644 index 0000000..e69de29