Skip to content

Commit

Permalink
Use camel case for metadata source types and column name validations
Browse files Browse the repository at this point in the history
  • Loading branch information
pflooky committed Jul 18, 2024
1 parent 7d86517 commit d249a35
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 39 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ and deep dive into issues [from the generated report](https://data.catering/samp
3. [Linux download](https://nightly.link/data-catering/data-caterer/workflows/build/main/data-caterer-linux.zip)
4. Docker
```shell
docker run -d -i -p 9898:9898 -e DEPLOY_MODE=standalone --name datacaterer datacatering/data-caterer-basic:0.11.7
docker run -d -i -p 9898:9898 -e DEPLOY_MODE=standalone --name datacaterer datacatering/data-caterer-basic:0.11.9
```
[Open localhost:9898](http://localhost:9898).

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -310,13 +310,13 @@ object Constants {
lazy val METADATA_SOURCE_HAS_OPEN_LINEAGE_SUPPORT = "metadataSourceHasOpenLineageSupport"
lazy val METADATA_SOURCE_URL = "metadataSourceUrl"
lazy val MARQUEZ = "marquez"
lazy val OPEN_METADATA = "open_metadata"
lazy val OPEN_API = "open_api"
lazy val GREAT_EXPECTATIONS = "great_expectations"
lazy val OPEN_DATA_CONTRACT_STANDARD = "open_data_contract_standard"
lazy val OPEN_METADATA = "openMetadata"
lazy val OPEN_API = "openApi"
lazy val GREAT_EXPECTATIONS = "greatExpectations"
lazy val OPEN_DATA_CONTRACT_STANDARD = "openDataContractStandard"
lazy val AMUNDSEN = "amundsen"
lazy val DATAHUB = "datahub"
lazy val DEFAULT_METADATA_SOURCE_NAME = "default_metadata_source"
lazy val DEFAULT_METADATA_SOURCE_NAME = "defaultMetadataSource"

//alert source
lazy val SLACK = "slack"
Expand Down Expand Up @@ -467,10 +467,10 @@ object Constants {
lazy val VALIDATION_SUPPORTING_OPTIONS = List(VALIDATION_COLUMN, VALIDATION_FIELD, VALIDATION_MIN, VALIDATION_MAX, VALIDATION_GROUP_BY_COLUMNS, VALIDATION_DESCRIPTION, VALIDATION_ERROR_THRESHOLD)

lazy val VALIDATION_PREFIX_JOIN_EXPRESSION = "expr:"
lazy val VALIDATION_COLUMN_NAME_COUNT_EQUAL = "column_count_equal"
lazy val VALIDATION_COLUMN_NAME_COUNT_BETWEEN = "column_count_between"
lazy val VALIDATION_COLUMN_NAME_MATCH_ORDER = "column_name_match_order"
lazy val VALIDATION_COLUMN_NAME_MATCH_SET = "column_name_match_set"
lazy val VALIDATION_COLUMN_NAME_COUNT_EQUAL = "columnCountEqual"
lazy val VALIDATION_COLUMN_NAME_COUNT_BETWEEN = "columnCountBetween"
lazy val VALIDATION_COLUMN_NAME_MATCH_ORDER = "columnNameMatchOrder"
lazy val VALIDATION_COLUMN_NAME_MATCH_SET = "columnNameMatchSet"

//configuration names
//flags config
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
package io.github.datacatering.datacaterer.api.parser

import com.fasterxml.jackson.annotation.JsonTypeInfo.Id
import com.fasterxml.jackson.core.{JsonGenerator, JsonParser}
import com.fasterxml.jackson.core.JsonGenerator
import com.fasterxml.jackson.databind.jsontype.impl.TypeIdResolverBase
import com.fasterxml.jackson.databind.{DatabindContext, DeserializationContext, JavaType, JsonDeserializer, JsonSerializer, SerializerProvider}
import com.fasterxml.jackson.databind.{DatabindContext, JavaType, JsonSerializer, SerializerProvider}
import io.github.datacatering.datacaterer.api.ValidationBuilder
import io.github.datacatering.datacaterer.api.model.Constants.{VALIDATION_COLUMN_NAME_COUNT_BETWEEN, VALIDATION_COLUMN_NAME_COUNT_EQUAL, VALIDATION_COLUMN_NAME_MATCH_ORDER, VALIDATION_COLUMN_NAME_MATCH_SET}
import io.github.datacatering.datacaterer.api.model.{ColumnNamesValidation, ExpressionValidation, GroupByValidation, UpstreamDataSourceValidation, Validation}
import io.github.datacatering.datacaterer.api.model.{ColumnNamesValidation, ExpressionValidation, GroupByValidation, UpstreamDataSourceValidation}

import scala.util.Try

Expand Down
34 changes: 18 additions & 16 deletions app/src/main/resources/application.conf
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ folders {
planFilePath = ${?PLAN_FILE_PATH}
taskFolderPath = "app/src/test/resources/sample/task"
taskFolderPath = ${?TASK_FOLDER_PATH}
validationFolderPath = "app/src/test/resources/sample/validation"
validationFolderPath = ${?VALIDATION_FOLDER_PATH}
recordTrackingFolderPath = "/tmp/data/generated/recordTracking"
recordTrackingFolderPath = ${?RECORD_TRACKING_FOLDER_PATH}
recordTrackingForValidationFolderPath = "/tmp/data/validation/recordTracking"
Expand Down Expand Up @@ -80,22 +82,22 @@ runtime {
master = "local[*]"
master = ${?DATA_CATERER_MASTER}
config {
"spark.sql.cbo.enabled": "true",
"spark.sql.adaptive.enabled": "true",
"spark.sql.cbo.planStats.enabled": "true",
"spark.sql.legacy.allowUntypedScalaUDF": "true",
"spark.sql.legacy.allowParameterlessCount": "true",
"spark.sql.statistics.histogram.enabled": "true",
"spark.sql.shuffle.partitions": "10",
"spark.sql.catalog.postgres": "",
"spark.sql.catalog.cassandra": "com.datastax.spark.connector.datasource.CassandraCatalog",
"spark.sql.catalog.iceberg": "org.apache.iceberg.spark.SparkCatalog",
"spark.sql.catalog.iceberg.type": "hadoop",
"spark.hadoop.fs.s3a.directory.marker.retention": "keep",
"spark.hadoop.fs.s3a.bucket.all.committer.magic.enabled": "true",
"spark.hadoop.fs.hdfs.impl": "org.apache.hadoop.hdfs.DistributedFileSystem",
"spark.hadoop.fs.file.impl": "com.globalmentor.apache.hadoop.fs.BareLocalFileSystem",
"spark.sql.extensions": "io.delta.sql.DeltaSparkSessionExtension,org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions"
"spark.sql.cbo.enabled" = "true",
"spark.sql.adaptive.enabled" = "true",
"spark.sql.cbo.planStats.enabled" = "true",
"spark.sql.legacy.allowUntypedScalaUDF" = "true",
'spark.sql.legacy.allowParameterlessCount' = "true",
"spark.sql.statistics.histogram.enabled" = "true",
"spark.sql.shuffle.partitions" = "10",
"spark.sql.catalog.postgres" = "",
"spark.sql.catalog.cassandra" = "com.datastax.spark.connector.datasource.CassandraCatalog",
"spark.sql.catalog.iceberg" = "org.apache.iceberg.spark.SparkCatalog",
"spark.sql.catalog.iceberg.type" = "hadoop",
"spark.hadoop.fs.s3a.directory.marker.retention" = "keep",
"spark.hadoop.fs.s3a.bucket.all.committer.magic.enabled" = "true",
"spark.hadoop.fs.hdfs.impl" = "org.apache.hadoop.hdfs.DistributedFileSystem",
"spark.hadoop.fs.file.impl" = "com.globalmentor.apache.hadoop.fs.BareLocalFileSystem",
"spark.sql.extensions" = "io.delta.sql.DeltaSparkSessionExtension,org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions"
}
}

Expand Down
10 changes: 5 additions & 5 deletions app/src/main/resources/ui/configuration-data.js
Original file line number Diff line number Diff line change
Expand Up @@ -1200,7 +1200,7 @@ dataSourcePropertiesMap.set("datahub", {
}
}
});
dataSourcePropertiesMap.set("great_expectations", {
dataSourcePropertiesMap.set("greatExpectations", {
optGroupLabel: "Metadata Source",
Name: "Great Expectations",
disabled: "",
Expand Down Expand Up @@ -1242,7 +1242,7 @@ dataSourcePropertiesMap.set("marquez", {
}
}
});
dataSourcePropertiesMap.set("open_api", {
dataSourcePropertiesMap.set("openApi", {
optGroupLabel: "Metadata Source",
Name: "OpenAPI/Swagger",
disabled: "",
Expand All @@ -1256,9 +1256,9 @@ dataSourcePropertiesMap.set("open_api", {
}
}
});
dataSourcePropertiesMap.set("open_data_contract_standard", {
dataSourcePropertiesMap.set("openDataContractStandard", {
optGroupLabel: "Metadata Source",
Name: "ODCS",
Name: "Open Data Contract Standard (ODCS)",
disabled: "",
properties: {
path: {
Expand All @@ -1270,7 +1270,7 @@ dataSourcePropertiesMap.set("open_data_contract_standard", {
}
}
});
dataSourcePropertiesMap.set("open_metadata", {
dataSourcePropertiesMap.set("openMetadata", {
optGroupLabel: "Metadata Source",
Name: "Open Metadata",
disabled: "",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ dataSources:
- groupByCols: ["account_id"]
aggType: "count"
aggExpr: "count == 1"
- columnNameType: "column_count_equal"
- columnNameType: "columnCountEqual"
count: "3"
- columnNameType: "column_count_between"
- columnNameType: "columnCountBetween"
minCount: "1"
maxCount: "2"
- columnNameType: "column_name_match_order"
- columnNameType: "columnNameMatchOrder"
names: ["account_id", "amount", "name"]
- columnNameType: "column_name_match_set"
- columnNameType: "columnNameMatchSet"
names: ["account_id", "my_name"]
- upstreamDataSource: "my_first_json"
upstreamReadOptions: {}
Expand Down
2 changes: 1 addition & 1 deletion gradle.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
groupId=io.github.data-catering
version=0.11.8
version=0.11.9

scalaVersion=2.12
scalaSpecificVersion=2.12.19
Expand Down

0 comments on commit d249a35

Please sign in to comment.