Skip to content

Commit

Permalink
Don't allow for additional properties for top level ODCS v2, add in e…
Browse files Browse the repository at this point in the history
…nd-to-end test for ODCS v3
  • Loading branch information
pflooky committed Oct 25, 2024
1 parent 8ed7d56 commit f57a1b8
Show file tree
Hide file tree
Showing 5 changed files with 289 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package io.github.datacatering.datacaterer.core.generator.metadata.datasource.op

import com.fasterxml.jackson.annotation.JsonIgnoreProperties

@JsonIgnoreProperties(ignoreUnknown = true)
case class OpenDataContractStandard(
dataset: Array[OpenDataContractStandardDataset],
datasetName: String,
Expand All @@ -12,9 +11,12 @@ case class OpenDataContractStandard(
uuid: String,
version: String,
apiVersion: Option[String] = None,
contractCreatedTs: Option[String] = None,
customProperties: Option[Array[OpenDataContractStandardCustomProperty]] = None,
database: Option[String] = None,
datasetDomain: Option[String] = None,
datasetKind: Option[String] = None,
datasetProject: Option[String] = None,
description: Option[OpenDataContractStandardDescription] = None,
driver: Option[String] = None,
driverVersion: Option[String] = None,
Expand All @@ -28,12 +30,14 @@ case class OpenDataContractStandard(
tags: Option[Array[String]] = None,
tenant: Option[String] = None,
`type`: Option[String] = None,
schedulerAppName: Option[String] = None,
server: Option[String] = None,
slaDefaultColumn: Option[String] = None,
slaProperties: Option[Array[OpenDataContractStandardServiceLevelAgreementProperty]] = None,
sourceSystem: Option[String] = None,
sourcePlatform: Option[String] = None,
stakeholders: Option[Array[OpenDataContractStandardStakeholder]] = None,
systemInstance: Option[String] = None,
username: Option[String] = None,
userConsumptionMode: Option[String] = None,
)
Expand Down Expand Up @@ -143,5 +147,5 @@ case class OpenDataContractStandardAuthoritativeDefinition(
@JsonIgnoreProperties(ignoreUnknown = true)
case class OpenDataContractStandardCustomProperty(
property: String,
value: String,
value: Any,
)
Original file line number Diff line number Diff line change
@@ -1,34 +1,51 @@
package io.github.datacatering.datacaterer.core.generator.metadata.datasource.opendatacontractstandard.model

import com.fasterxml.jackson.annotation.JsonIgnoreProperties
import com.fasterxml.jackson.core.`type`.TypeReference
import com.fasterxml.jackson.module.scala.JsonScalaEnumeration
import io.github.datacatering.datacaterer.core.generator.metadata.datasource.opendatacontractstandard.model


object KindEnum extends Enumeration {
type KindEnum = Value
val DataContract = Value
}

class KindEnumCls extends TypeReference[KindEnum.type]

object ApiVersionEnum extends Enumeration {
val `v3.0.0`, `v2.2.2`, `v2.2.1`, `v2.2.0` = Value
type ApiVersionEnum = Value
val `v3.0.0`: model.ApiVersionEnum.Value = Value(0, "v3.0.0")
val `v2.2.2`: model.ApiVersionEnum.Value = Value(1, "v2.2.2")
val `v2.2.1`: model.ApiVersionEnum.Value = Value(2, "v2.2.1")
val `v2.2.0`: model.ApiVersionEnum.Value = Value(3, "v2.2.0")
}
class ApiVersionEnumCls extends TypeReference[ApiVersionEnum.type]

object ServerTypeEnum extends Enumeration {
val api, athena, azure, bigquery, clickhouse, databricks, denodo, dremio, duckdb, glue, cloudsql, db2, informix, kafka, kinesis, local, mysql, oracle, postgresql, postgres, presto, pubsub, redshift, s3, sftp, snowflake, sqlserver, synapse, trino, vertica, custom = Value
type ServerTypeEnum = Value
val api, athena, azure, bigquery, clickhouse, databricks, denodo, dremio, duckdb, glue, cloudsql, db2, informix,
kafka, kinesis, local, mysql, oracle, postgresql, postgres, presto, pubsub, redshift, s3, sftp, snowflake,
sqlserver, synapse, trino, vertica, custom = Value
}
class ServerTypeEnumCls extends TypeReference[ServerTypeEnum.type]

object LogicalTypeEnum extends Enumeration {
type LogicalTypeEnum = Value
val string, date, number, integer, `object`, array, boolean = Value
}
class LogicalTypeEnumCls extends TypeReference[LogicalTypeEnum.type]

object DataQualityTypeEnum extends Enumeration {
type DataQualityTypeEnum = Value
val text, library, sql, custom = Value
}
class DataQualityTypeEnumCls extends TypeReference[DataQualityTypeEnum.type]

@JsonIgnoreProperties(ignoreUnknown = true)
case class OpenDataContractStandardV3(
apiVersion: ApiVersionEnum.Value,
@JsonScalaEnumeration(classOf[ApiVersionEnumCls]) apiVersion: ApiVersionEnum.ApiVersionEnum,
id: String,
kind: KindEnum.Value,
@JsonScalaEnumeration(classOf[KindEnumCls]) kind: KindEnum.KindEnum,
status: String,
version: String,
contractCreatedTs: Option[String] = None,
Expand Down Expand Up @@ -63,7 +80,7 @@ case class OpenDataContractStandardV3(
@JsonIgnoreProperties(ignoreUnknown = true)
case class OpenDataContractStandardServerV3(
server: String,
`type`: ServerTypeEnum.Value,
@JsonScalaEnumeration(classOf[ServerTypeEnumCls]) `type`: ServerTypeEnum.ServerTypeEnum,
description: Option[String],
environment: Option[String],
roles: Option[Array[OpenDataContractStandardRole]],
Expand All @@ -90,7 +107,7 @@ case class OpenDataContractStandardSchemaV3(
@JsonIgnoreProperties(ignoreUnknown = true)
case class OpenDataContractStandardElementV3(
name: String,
logicalType: LogicalTypeEnum.Value,
@JsonScalaEnumeration(classOf[LogicalTypeEnumCls]) logicalType: LogicalTypeEnum.LogicalTypeEnum,
physicalType: String,
authoritativeDefinitions: Option[Array[OpenDataContractStandardAuthoritativeDefinition]] = None,
businessName: Option[String] = None,
Expand Down Expand Up @@ -136,7 +153,7 @@ case class OpenDataContractStandardLogicalTypeOptionsV3(

@JsonIgnoreProperties(ignoreUnknown = true)
case class OpenDataContractStandardDataQualityV3(
`type`: DataQualityTypeEnum.Value,
@JsonScalaEnumeration(classOf[DataQualityTypeEnumCls]) `type`: DataQualityTypeEnum.DataQualityTypeEnum,
authoritativeDefinitions: Option[Array[OpenDataContractStandardAuthoritativeDefinition]] = None,
businessImpact: Option[String] = None,
code: Option[String] = None,
Expand Down
233 changes: 233 additions & 0 deletions app/src/test/resources/sample/metadata/odcs/full-example-v3.odcs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
# What's this data contract about?
domain: seller # Domain
dataProduct: my quantum # Data product name
version: 1.1.0 # Version (follows semantic versioning)
status: current
id: 53581432-6c55-4ba2-a65f-72344a91553a

# Lots of information
description:
purpose: Views built on top of the seller tables.
limitations: Data based on seller perspective, no buyer information
usage: Predict sales over time
tenant: ClimateQuantumInc

kind: DataContract
apiVersion: v3.0.0 # Standard version (follows semantic versioning)

# Infrastructure & servers
servers:
- server: my-postgres
type: postgres
host: localhost
port: 5432
database: pypl-edw
schema: pp_access_views

# Dataset, schema and quality
schema:
- name: tbl
physicalName: tbl_1
physicalType: table
description: Provides core payment metrics
authoritativeDefinitions:
- url: https://catalog.data.gov/dataset/air-quality
type: businessDefinition
- url: https://youtu.be/jbY1BKFj9ec
type: videoTutorial
tags: [ ]
dataGranularityDescription: Aggregation on columns txn_ref_dt, pmt_txn_id
properties:
- name: txn_ref_dt
primaryKey: false
primaryKeyPosition: -1
businessName: transaction reference date
logicalType: date
physicalType: date
required: false
description: Reference date for transaction
partitioned: true
partitionKeyPosition: 1
criticalDataElement: false
tags: [ ]
classification: public
transformSourceObjects:
- table_name_1
- table_name_2
- table_name_3
transformLogic: sel t1.txn_dt as txn_ref_dt from table_name_1 as t1, table_name_2 as t2, table_name_3 as t3 where t1.txn_dt=date-3
transformDescription: defines the logic in business terms; logic for dummies
examples:
- "2022-10-03"
- "2020-01-28"
customProperties:
- property: anonymizationStrategy
value: none
- name: rcvr_id
primaryKey: true
primaryKeyPosition: 1
businessName: receiver id
logicalType: string
physicalType: varchar(18)
required: false
description: A description for column rcvr_id.
partitioned: false
partitionKeyPosition: -1
criticalDataElement: false
tags: [ ]
classification: restricted
- name: rcvr_cntry_code
primaryKey: false
primaryKeyPosition: -1
businessName: receiver country code
logicalType: string
physicalType: varchar(2)
required: false
description: Country code
partitioned: false
partitionKeyPosition: -1
criticalDataElement: false
tags: [ ]
classification: public
authoritativeDefinitions:
- url: https://collibra.com/asset/742b358f-71a5-4ab1-bda4-dcdba9418c25
type: businessDefinition
- url: https://github.com/myorg/myrepo
type: transformationImplementation
- url: jdbc:postgresql://localhost:5432/adventureworks/tbl_1/rcvr_cntry_code
type: implementation
encryptedName: rcvr_cntry_code_encrypted
quality:
- rule: nullCheck
description: column should not contain null values
dimension: completeness # dropdown 7 values
type: library
severity: error
businessImpact: operational
schedule: 0 20 * * *
scheduler: cron
customProperties:
- property: FIELD_NAME
value:
- property: COMPARE_TO
value:
- property: COMPARISON_TYPE
value: Greater than
quality:
- rule: countCheck
type: library
description: Ensure row count is within expected volume range
dimension: completeness
method: reconciliation
severity: error
businessImpact: operational
schedule: 0 20 * * *
scheduler: cron
customProperties:
- property: business-key
value:
- txn_ref_dt
- rcvr_id


# Pricing
price:
priceAmount: 9.95
priceCurrency: USD
priceUnit: megabyte


# Team
team:
- username: ceastwood
role: Data Scientist
dateIn: "2022-08-02"
dateOut: "2022-10-01"
replacedByUsername: mhopper
- username: mhopper
role: Data Scientist
dateIn: "2022-10-01"
- username: daustin
role: Owner
comment: Keeper of the grail
dateIn: "2022-10-01"


# Roles
roles:
- role: microstrategy_user_opr
access: read
firstLevelApprovers: Reporting Manager
secondLevelApprovers: 'mandolorian'
- role: bq_queryman_user_opr
access: read
firstLevelApprovers: Reporting Manager
secondLevelApprovers: na
- role: risk_data_access_opr
access: read
firstLevelApprovers: Reporting Manager
secondLevelApprovers: 'dathvador'
- role: bq_unica_user_opr
access: write
firstLevelApprovers: Reporting Manager
secondLevelApprovers: 'mickey'

# SLA
slaDefaultElement: tab1.txn_ref_dt
slaProperties:
- property: latency # Property, see list of values in DP QoS
value: 4
unit: d # d, day, days for days; y, yr, years for years
element: tab1.txn_ref_dt # This would not be needed as it is the same table.column as the default one
- property: generalAvailability
value: "2022-05-12T09:30:10-08:00"
- property: endOfSupport
value: "2032-05-12T09:30:10-08:00"
- property: endOfLife
value: "2042-05-12T09:30:10-08:00"
- property: retention
value: 3
unit: y
element: tab1.txn_ref_dt
- property: frequency
value: 1
valueExt: 1
unit: d
element: tab1.txn_ref_dt
- property: timeOfAvailability
value: 09:00-08:00
element: tab1.txn_ref_dt
driver: regulatory # Describes the importance of the SLA: [regulatory|analytics|operational|...]
- property: timeOfAvailability
value: 08:00-08:00
element: tab1.txn_ref_dt
driver: analytics


# Support
support:
- channel: '#product-help' # Simple Slack communication channel
tool: slack
url: https://aidaug.slack.com/archives/C05UZRSBKLY
- channel: datacontract-ann # Simple distribution list
tool: email
url: mailto:[email protected]
- channel: Feedback # Product Feedback
description: General Product Feedback (Public)
url: https://product-feedback.com

# Tags
tags:
- transactions


# Custom properties
customProperties:
- property: refRulesetName
value: gcsc.ruleset.name
- property: somePropertyName
value: property.value
- property: dataprocClusterName # Used for specific applications like Elevate
value: [ cluster name ]

contractCreatedTs: "2022-11-15T02:59:43+00:00"
Loading

0 comments on commit f57a1b8

Please sign in to comment.