Skip to content

Commit

Permalink
Merge pull request #16 from fivetran/example/unioned-data
Browse files Browse the repository at this point in the history
Example for unioning source data
  • Loading branch information
fivetran-joemarkiewicz authored Jun 23, 2021
2 parents 6e2dab2 + c91a5eb commit f611ddd
Show file tree
Hide file tree
Showing 14 changed files with 82 additions and 43 deletions.
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,19 @@ vars:
shopify_schema: your_schema_name
```
If you have multiple Shopify connectors in Fivetran and would like to use this package on all of them simultaneously, we have provided functionality to do so. The package will union all of the data together and pass the unioned table into the transformations. You will be able to see which source it came from in the `source_relation` column of each model. To use this functionality, you will need to set either the `union_schemas` or `union_databases` variables:

```yml
# dbt_project.yml
...
config-version: 2
vars:
union_schema: ['shopify_usa','shopify_canada'] # use this if the data is in different schemas/datasets of the same database/project
union_databases: ['shopify_usa','shopify_canada'] # use this if the data is in different databases/projects but uses the same schema name
```

### Changing the Build Schema
By default this package will build the Shopify staging models within a schema titled (<target_schema> + `_stg_shopify`) and the Shopify final models within a schema titled (<target_schema> + `_shopify`) in your target database. If this is not where you would like your modeled Shopify data to be written to, add the following configuration to your `dbt_project.yml` file:

Expand Down
2 changes: 1 addition & 1 deletion dbt_project.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

name: 'shopify'
version: '1.2.0'
version: '0.4.0'
config-version: 2

require-dbt-version: [">=0.18.0", "<0.20.0"]
Expand Down
2 changes: 1 addition & 1 deletion integration_tests/dbt_project.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: 'shopify_integration_tests'
version: '0.2.1'
version: '0.4.0'
profile: 'integration_tests'
config-version: 2

Expand Down
20 changes: 10 additions & 10 deletions models/intermediate/intermediate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@ version: 2

models:
- name: shopify__customers__order_aggregates
columns:
- name: customer_id
tests:
- unique
- not_null
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- customer_id
- source_relation
- name: shopify__orders__order_line_aggregates
columns:
- name: order_id
tests:
- unique
- not_null
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- order_id
- source_relation
- name: shopify__orders__order_refunds
5 changes: 3 additions & 2 deletions models/intermediate/shopify__customers__order_aggregates.sql
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ with orders as (

select
orders.customer_id,
orders.source_relation,
min(orders.created_timestamp) as first_order_timestamp,
max(orders.created_timestamp) as most_recent_order_timestamp,
avg(case when lower(transactions.kind) in ('sale','capture') then transactions.currency_exchange_calculated_amount end) as average_order_value,
Expand All @@ -32,9 +33,9 @@ with orders as (
count(distinct orders.order_id) as lifetime_count_orders
from orders
left join transactions
using (order_id)
using (order_id, source_relation)
where customer_id is not null
group by 1
group by 1,2

)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@ with order_line as (

select
order_id,
source_relation,
count(*) as line_item_count
from order_line
group by 1
group by 1,2

)

Expand Down
2 changes: 2 additions & 0 deletions models/intermediate/shopify__orders__order_refunds.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ with refunds as (
refunds.created_at,
refunds.order_id,
refunds.user_id,
refunds.source_relation,
order_line_refunds.order_line_id,
order_line_refunds.restock_type,
order_line_refunds.quantity,
Expand All @@ -24,6 +25,7 @@ with refunds as (
from refunds
left join order_line_refunds
on refunds.refund_id = order_line_refunds.refund_id
and refunds.source_relation = order_line_refunds.source_relation

)

Expand Down
40 changes: 25 additions & 15 deletions models/shopify.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ models:

- name: shopify__orders
description: Each record represents an order in Shopify.
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- order_id
- source_relation
columns:
- name: _fivetran_synced
description: "{{ doc('_fivetran_synced') }}"
Expand Down Expand Up @@ -94,9 +99,6 @@ models:
- name: fulfillment_status
description: The order's status in terms of fulfilled line items.
- name: order_id
tests:
- unique
- not_null
description: The ID of the order, used for API purposes. This is different from the order_number property, which is the ID used by the shop owner and customer.
- name: landing_site_base_url
description: The URL for the page where the buyer landed when they entered the shop.
Expand Down Expand Up @@ -202,6 +204,11 @@ models:

- name: shopify__customers
description: Each record represents a customer in Shopify.
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- customer_id
- source_relation
columns:
- name: _fivetran_synced
description: "{{ doc('_fivetran_synced') }}"
Expand All @@ -216,9 +223,6 @@ models:
- name: first_name
description: The customer's first name.
- name: customer_id
tests:
- unique
- not_null
description: A unique identifier for the customer.
- name: last_name
description: The customer's last name.
Expand Down Expand Up @@ -249,6 +253,11 @@ models:

- name: shopify__products
description: Each record represents a product in Shopify.
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- product_id
- source_relation
columns:
- name: _fivetran_deleted
description: Whether the record has been deleted in the source system.
Expand All @@ -259,9 +268,6 @@ models:
- name: handle
description: A unique human-friendly string for the product. Automatically generated from the product's title.
- name: product_id
tests:
- unique
- not_null
description: An unsigned 64-bit integer that's used as a unique identifier for the product. Each id is unique across the Shopify system. No two products will have the same id, even if they're from different shops.
- name: product_type
description: A categorization for the product used for filtering and searching products.
Expand Down Expand Up @@ -290,6 +296,11 @@ models:

- name: shopify__order_lines
description: Each record represents a line item of an order in Shopify.
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- order_line_id
- source_relation
columns:
- name: _fivetran_synced
description: "{{ doc('_fivetran_synced') }}"
Expand All @@ -304,9 +315,6 @@ models:
- name: grams
description: The weight of the item in grams.
- name: order_line_id
tests:
- unique
- not_null
description: The ID of the line item.
- name: name
description: The name of the product variant.
Expand Down Expand Up @@ -389,11 +397,13 @@ models:

- name: shopify__transactions
description: Each record represents a transaction in Shopify.
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- transaction_id
- source_relation
columns:
- name: transaction_id
tests:
- unique
- not_null
description: The ID for the transaction.
- name: order_id
description: The ID for the order that the transaction is associated with.
Expand Down
11 changes: 7 additions & 4 deletions models/shopify__customer_cohorts.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ with calendar as (
calendar.date_day as date_month,
customers.customer_id,
customers.first_order_timestamp,
customers.source_relation,
{{ dbt_utils.date_trunc('month', 'first_order_timestamp') }} as cohort_month
from calendar
inner join customers
Expand All @@ -32,32 +33,34 @@ with calendar as (
customer_calendar.customer_id,
customer_calendar.first_order_timestamp,
customer_calendar.cohort_month,
customer_calendar.source_relation,
coalesce(count(distinct orders.order_id), 0) as order_count_in_month,
coalesce(sum(orders.order_adjusted_total), 0) as total_price_in_month,
coalesce(sum(orders.line_item_count), 0) as line_item_count_in_month
from customer_calendar
left join orders
on customer_calendar.customer_id = orders.customer_id
and customer_calendar.source_relation = orders.source_relation
and customer_calendar.date_month = cast({{ dbt_utils.date_trunc('month', 'created_timestamp') }} as date)
group by 1,2,3,4
group by 1,2,3,4,5

), windows as (

{% set partition_string = 'partition by customer_id order by date_month rows between unbounded preceding and current row' %}
{% set partition_string = 'partition by customer_id, source_relation order by date_month rows between unbounded preceding and current row' %}

select
*,
sum(total_price_in_month) over ({{ partition_string }}) as total_price_lifetime,
sum(order_count_in_month) over ({{ partition_string }}) as order_count_lifetime,
sum(line_item_count_in_month) over ({{ partition_string }}) as line_item_count_lifetime,
row_number() over (partition by customer_id order by date_month asc) as cohort_month_number
row_number() over (partition by customer_id, source_relation order by date_month asc) as cohort_month_number
from orders_joined

), surrogate_key as (

select
*,
{{ dbt_utils.surrogate_key(['date_month','customer_id']) }} as customer_cohort_id
{{ dbt_utils.surrogate_key(['date_month','customer_id','source_relation']) }} as customer_cohort_id
from windows

)
Expand Down
2 changes: 1 addition & 1 deletion models/shopify__customers.sql
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ with customers as (
coalesce(orders.lifetime_count_orders, 0) as lifetime_count_orders
from customers
left join orders
using (customer_id)
using (customer_id, source_relation)

)

Expand Down
5 changes: 4 additions & 1 deletion models/shopify__order_lines.sql
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@ with order_lines as (

select
order_line_id,
source_relation,
sum(quantity) as quantity,
sum(coalesce(subtotal, 0)) as subtotal
from refunds
group by 1
group by 1,2

), joined as (

Expand Down Expand Up @@ -56,8 +57,10 @@ with order_lines as (
from order_lines
left join refunds_aggregated
on refunds_aggregated.order_line_id = order_lines.order_line_id
and refunds_aggregated.source_relation = order_lines.source_relation
left join product_variants
on product_variants.variant_id = order_lines.variant_id
and product_variants.source_relation = order_lines.source_relation

)

Expand Down
11 changes: 8 additions & 3 deletions models/shopify__orders.sql
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,20 @@ with orders as (
), refund_aggregates as (
select
order_id,
source_relation,
sum(subtotal) as refund_subtotal,
sum(total_tax) as refund_total_tax
from refunds
group by 1
group by 1,2

), order_adjustments_aggregates as (
select
order_id,
source_relation,
sum(amount) as order_adjustment_amount,
sum(tax_amount) as order_adjustment_tax_amount
from order_adjustments
group by 1
group by 1,2

), joined as (

Expand All @@ -48,16 +50,19 @@ with orders as (
from orders
left join order_lines
on orders.order_id = order_lines.order_id
and orders.source_relation = order_lines.source_relation
left join refund_aggregates
on orders.order_id = refund_aggregates.order_id
and orders.source_relation = refund_aggregates.source_relation
left join order_adjustments_aggregates
on orders.order_id = order_adjustments_aggregates.order_id
and orders.source_relation = order_adjustments_aggregates.source_relation

), windows as (

select
*,
row_number() over (partition by customer_id order by created_timestamp) as customer_order_seq_number
row_number() over (partition by customer_id, source_relation order by created_timestamp) as customer_order_seq_number
from joined

), new_vs_repeat as (
Expand Down
7 changes: 4 additions & 3 deletions models/shopify__products.sql
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ with products as (

select
order_lines.product_id,
order_lines.source_relation,
sum(order_lines.quantity) as quantity_sold,
sum(order_lines.pre_tax_price) as subtotal_sold,
sum(order_lines.quantity_net_refunds) as quantity_sold_net_refunds,
Expand All @@ -25,8 +26,8 @@ with products as (
max(orders.created_timestamp) as most_recent_order_timestamp
from order_lines
left join orders
using (order_id)
group by 1
using (order_id, source_relation)
group by 1,2

), joined as (

Expand All @@ -40,7 +41,7 @@ with products as (
order_lines_aggregated.most_recent_order_timestamp
from products
left join order_lines_aggregated
using (product_id)
using (product_id, source_relation)

)

Expand Down
2 changes: 1 addition & 1 deletion packages.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
packages:

- package: fivetran/shopify_source
version: [">=0.3.0","<0.4.0"]
version: [">=0.4.0","<0.5.0"]

0 comments on commit f611ddd

Please sign in to comment.