Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add line item standardized model #83

Merged
merged 24 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# dbt_shopify v0.13.0
[PR #83](https://github.com/fivetran/dbt_shopify/pull/83) includes the following changes:

## Features
- Introduced the new `shopify__line_item_enhanced` model. This model includes order line items enriched with order, payment, customer, and refund information. This model has been built with the intention of retaining a common line item schema across all other Fivetran billing data models.

# dbt_shopify v0.12.2

[PR #84](https://github.com/fivetran/dbt_shopify/pull/84) includes the following changes:
Expand Down
2 changes: 1 addition & 1 deletion dbt_project.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: 'shopify'
version: '0.12.2'
version: '0.13.0'
config-version: 2
require-dbt-version: [">=1.3.0", "<2.0.0"]
models:
Expand Down
10 changes: 5 additions & 5 deletions integration_tests/ci/sample.profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ integration_tests:
pass: "{{ env_var('CI_REDSHIFT_DBT_PASS') }}"
dbname: "{{ env_var('CI_REDSHIFT_DBT_DBNAME') }}"
port: 5439
schema: shopify_integration_tests_8
schema: shopify_integration_tests_9
threads: 8
bigquery:
type: bigquery
method: service-account-json
project: 'dbt-package-testing'
schema: shopify_integration_tests_8
schema: shopify_integration_tests_9
threads: 8
keyfile_json: "{{ env_var('GCLOUD_SERVICE_KEY') | as_native }}"
snowflake:
Expand All @@ -33,7 +33,7 @@ integration_tests:
role: "{{ env_var('CI_SNOWFLAKE_DBT_ROLE') }}"
database: "{{ env_var('CI_SNOWFLAKE_DBT_DATABASE') }}"
warehouse: "{{ env_var('CI_SNOWFLAKE_DBT_WAREHOUSE') }}"
schema: shopify_integration_tests_8
schema: shopify_integration_tests_9
threads: 8
postgres:
type: postgres
Expand All @@ -42,13 +42,13 @@ integration_tests:
pass: "{{ env_var('CI_POSTGRES_DBT_PASS') }}"
dbname: "{{ env_var('CI_POSTGRES_DBT_DBNAME') }}"
port: 5432
schema: shopify_integration_tests_8
schema: shopify_integration_tests_9
threads: 8
databricks:
catalog: "{{ env_var('CI_DATABRICKS_DBT_CATALOG') }}"
host: "{{ env_var('CI_DATABRICKS_DBT_HOST') }}"
http_path: "{{ env_var('CI_DATABRICKS_DBT_HTTP_PATH') }}"
schema: shopify_integration_tests_8
schema: shopify_integration_tests_9
threads: 8
token: "{{ env_var('CI_DATABRICKS_DBT_TOKEN') }}"
type: databricks
Expand Down
10 changes: 5 additions & 5 deletions integration_tests/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: 'shopify_integration_tests'
version: '0.12.2'
version: '0.13.0'
profile: 'integration_tests'
config-version: 2

Expand All @@ -11,7 +11,7 @@ clean-targets: # directories to be removed by `dbt clean`
vars:
# shopify_using_fulfillment_event: true # set to true when regenerating docs
# shopify_using_all_metafields: true # set to true when regenerating docs
shopify_schema: shopify_integration_tests_8
shopify_schema: shopify_integration_tests_9
shopify_source:
shopify_customer_identifier: "shopify_customer_data"
shopify_order_line_refund_identifier: "shopify_order_line_refund_data"
Expand Down Expand Up @@ -53,9 +53,9 @@ dispatch:
search_order: ['spark_utils', 'dbt_utils']

models:
shopify:
+schema: "{{ 'shopify_integrations_tests_sqlw' if target.name == 'databricks-sql' else 'shopify' }}"
# +schema: "shopify_{{ var('directed_schema','dev') }}"
# shopify:
# +schema: "{{ 'shopify_integrations_tests_sqlw' if target.name == 'databricks-sql' else 'shopify' }}"
+schema: "shopify_{{ var('directed_schema','dev') }}"

seeds:
shopify_integration_tests:
Expand Down
fivetran-joemarkiewicz marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{{ config(
tags="fivetran_validations",
enabled=var('fivetran_validation_tests_enabled', false)
) }}

with prod as (
select *
from {{ target.schema }}_shopify_prod.shopify__line_item_enhanced
),

dev as (
select *
from {{ target.schema }}_shopify_dev.shopify__line_item_enhanced
),

final as (
-- test will fail if any rows from prod are not found in dev
(select * from prod
except distinct
select * from dev)

union all -- union since we only care if rows are produced

-- test will fail if any rows from dev are not found in prod
(select * from dev
except distinct
select * from prod)
)

select *
from final
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{{ config(
tags="fivetran_validations",
enabled=var('fivetran_validation_tests_enabled', false)
) }}

-- this test is to make sure the rows counts are the same between versions
with prod as (
select count(*) as prod_rows
from {{ target.schema }}_shopify_prod.shopify__line_item_enhanced
),

dev as (
select count(*) as dev_rows
from {{ target.schema }}_shopify_dev.shopify__line_item_enhanced
)

-- test will return values and fail if the row counts don't match
select *
from prod
join dev
on prod.prod_rows != dev.dev_rows
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{{ config(
tags="fivetran_validations",
enabled=var('fivetran_validation_tests_enabled', false)
) }}

-- this test is to make sure there is no fanout between the staging order_line_table and the line_item_enhanced model.
with stg_order_line as (
select
1 as join_key,
count(*) as order_line_count,
count(distinct order_id) as order_count
from {{ ref('stg_shopify__order_line') }}
),

line_item_enhanced as (
select
1 as join_key,
count(*) as line_item_enhanced_count
from {{ ref('shopify__line_item_enhanced') }}
),

-- test will return values and fail if the row counts don't match

final as (
select
stg_order_line.join_key,
stg_order_line.order_line_count + stg_order_line.order_count as total_line_and_order_count,
line_item_enhanced.line_item_enhanced_count
from stg_order_line
join line_item_enhanced
on stg_order_line.join_key = line_item_enhanced.join_key
)

select *
from final
where total_line_and_order_count != line_item_enhanced_count
200 changes: 200 additions & 0 deletions models/common_data_models/shopify__line_item_enhanced.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
with line_items as (

select *
from {{ var('shopify_order_line')}}

), orders as (

select *
from {{ var('shopify_order')}}

), product as (

select *
from {{ var('shopify_product')}}

), transactions as (

select
order_id,
kind,
source_relation,
{{ fivetran_utils.string_agg("cast (transaction_id as " ~ dbt.type_string() ~ ")", "', '") }} AS transaction_id,
{{ fivetran_utils.string_agg("cast (processed_timestamp as " ~ dbt.type_string() ~ ")", "', '") }} as processed_timestamp,
{{ fivetran_utils.string_agg('gateway', "', '") }} as gateway

from {{ var('shopify_transaction')}}
where kind = 'capture'
and status = 'success'
group by 1,2,3

), refund_transactions as (

select
order_id,
source_relation,
sum(amount) as total_order_refund_amount
from {{ var('shopify_transaction')}}
where kind = 'refund'
group by 1,2

), order_line_refund as ( -- There is a unique row for each individual item. So if quantity = 4 of 1 line item, then each has its own row

select
order_line_id,
source_relation,
sum(subtotal + total_tax) as total_refund_amount
from {{ var('shopify_order_line_refund')}}
group by 1,2

), customer as (

select *
from {{ var('shopify_customer')}}

), shipping as (

select *
from {{ ref('int_shopify__order__shipping_aggregates')}}

), enhanced as (

select
li.order_id as header_id,
li.order_line_id as line_item_id,
li.index as line_item_index,
o.created_timestamp as created_at,
o.currency as currency,
o.fulfillment_status as header_status,
li.product_id as product_id,
p.title as product_name,
t.kind as transaction_type,
null as billing_type,
p.product_type as product_type,
li.quantity as quantity,
li.price as unit_amount,
li.total_discount as discount_amount,
o.total_tax as tax_amount,
fivetran-joemarkiewicz marked this conversation as resolved.
Show resolved Hide resolved
(li.quantity * li.price) as total_amount,
t.transaction_id as payment_id,
null as payment_method_id,
t.gateway as payment_method, -- payment_method in tender_transaction can be something like 'apply_pay', where gateway is like 'gift card' or 'shopify payments' which I think is more relevant here
t.processed_timestamp as payment_at,
shipping.discounted_shipping_price + shipping.shipping_tax as fee_amount,
rt.total_order_refund_amount as refund_amount,
null as subscription_id,
null as subscription_period_started_at,
null as subscription_period_ended_at,
null as subscription_status,
o.customer_id,
'customer' as customer_level,
{{ dbt.concat(["c.first_name", "''", "c.last_name"]) }} as customer_name,
o.shipping_address_company as customer_company,
o.email as customer_email,
o.shipping_address_city as customer_city,
o.shipping_address_country as customer_country,
li.source_relation
from line_items li
left join orders o
on li.order_id = o.order_id
and li.source_relation = o.source_relation
left join transactions t
on o.order_id = t.order_id
and o.source_relation = t.source_relation
left join refund_transactions rt
on o.order_id = rt.order_id
and o.source_relation = rt.source_relation
left join product p
on li.product_id = p.product_id
and li.source_relation = p.source_relation
left join customer c
on o.customer_id = c.customer_id
and o.source_relation = c.source_relation
left join shipping
on o.order_id = shipping.order_id
and o.source_relation = shipping.source_relation

), final as (

select
header_id,
cast(line_item_id as {{ dbt.type_numeric() }}) as line_item_id,
cast(line_item_index as {{ dbt.type_numeric() }}) as line_item_index,
'line_item' as record_type,
created_at,
currency,
header_status,
billing_type,
cast(product_id as {{ dbt.type_numeric() }}) as product_id,
product_name,
product_type,
cast(quantity as {{ dbt.type_numeric() }}) as quantity,
cast(unit_amount as {{ dbt.type_numeric() }}) as unit_amount,
cast(null as {{ dbt.type_numeric() }}) as discount_amount,
cast(null as {{ dbt.type_numeric() }}) as tax_amount,
fivetran-joemarkiewicz marked this conversation as resolved.
Show resolved Hide resolved
cast(total_amount as {{ dbt.type_numeric() }}) as total_amount,
payment_id,
payment_method_id,
payment_method,
payment_at,
cast(null as {{ dbt.type_numeric() }}) as fee_amount,
cast(null as {{ dbt.type_numeric() }}) as refund_amount,
subscription_id,
subscription_period_started_at,
subscription_period_ended_at,
subscription_status,
customer_id,
customer_level,
customer_name,
customer_company,
customer_email,
customer_city,
customer_country,
source_relation
from enhanced

union all

select
header_id,
cast(null as {{ dbt.type_numeric() }}) as line_item_id,
cast(0 as {{ dbt.type_numeric() }}) as line_item_index,
'header' as record_type,
created_at,
currency,
header_status,
billing_type,
cast(null as {{ dbt.type_numeric() }}) as product_id,
cast(null as {{ dbt.type_string() }}) as product_name,
cast(null as {{ dbt.type_string() }}) as product_type,
cast(null as {{ dbt.type_numeric() }}) as quantity,
cast(null as {{ dbt.type_numeric() }}) as unit_amount,
discount_amount,
tax_amount,
cast(null as {{ dbt.type_numeric() }}) as total_amount,
payment_id,
payment_method_id,
payment_method,
payment_at,
fee_amount,
refund_amount,
subscription_id,
subscription_period_started_at,
subscription_period_ended_at,
subscription_status,
customer_id,
customer_level,
customer_name,
customer_company,
customer_email,
customer_city,
customer_country,
source_relation
from enhanced
where line_item_index = 1 -- filter to just one arbitrary record

)

select *
from final
order by header_id, line_item_index asc, source_relation
Loading