Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modify fct_model_directories model to handle staging model dependent on multiple sources #525

Draft
wants to merge 12 commits into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 37 additions & 1 deletion models/marts/structure/fct_model_directories.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
-- This model finds all cases where a model is NOT in the appropriate subdirectory:
-- For staging models: The files should be in nested in the staging folder in a subfolder that matches their source parent's name.
-- For staging models: The files should be nested in the staging folder in a subfolder that matches their source parent's name.
-- For non-staging models: The files should be nested closest to their appropriate folder.
{{ config(materialized='table') }}

{% set directory_pattern = get_directory_pattern() %}

with all_graph_resources as (
Expand Down Expand Up @@ -32,6 +34,38 @@ staging_models as (
and child_model_type = 'staging'
),

--Add this to get all the stage models with count of parent source. Next step will be to count which ones have more than one source parent
staging_by_parent_source_count as (
select
count(distinct parent_source_name) as resource_count,
child as resource_name,
child_resource_type as resource_type,
child_model_type as model_type,
child_file_path as current_file_path,
'models{{ directory_pattern }}staging{{ directory_pattern }}' || staging_models.parent_source_name || '{{ directory_pattern }}' as list_agg_string
from staging_models
group by child, child_resource_type, child_model_type, child_file_path,
'models{{ directory_pattern }}staging{{ directory_pattern }}' || staging_models.parent_source_name || '{{ directory_pattern }}'
),

--Added this CTE to listagg() the multiple suggested paths and advise the user to split the source file into those two places.
multiple_sources_staging_to_split as (
select
staging_models.child as resource_name,
staging_models.child_resource_type as resource_type,
staging_models.child_model_type as model_type,
staging_models.child_file_path as current_file_path,
'More than one source. Split into separate staging models in: ' ||
{{ dbt.listagg(measure='list_agg_string', delimiter_text="' AND '", order_by_clause='order by current_file_path' if target.type in ['snowflake','redshift','duckdb','trino']) }} as change_file_path_to
from staging_models
join staging_by_parent_source_count on staging_models.child = staging_by_parent_source_count.resource_name and
staging_models.child_resource_type = staging_by_parent_source_count.resource_type and
staging_models.child_model_type = staging_by_parent_source_count.model_type and
staging_models.child_file_path = staging_by_parent_source_count.current_file_path
where staging_by_parent_source_count.resource_count > 1
group by staging_models.child, staging_models.child_resource_type, staging_models.child_model_type, staging_models.child_file_path
),

-- find all staging models that are NOT in their source parent's subdirectory
inappropriate_subdirectories_staging as (
select distinct -- must do distinct to avoid duplicates when staging model has multiple paths to a given source
Expand Down Expand Up @@ -63,6 +97,8 @@ unioned as (
select * from inappropriate_subdirectories_staging
union all
select * from innappropriate_subdirectories_non_staging_models
union all --Added union all to append these results
select * from multiple_sources_staging_to_split
)

select * from unioned
Expand Down
Loading