From 20fbd6ae12da6501c80738391275449d528b98d6 Mon Sep 17 00:00:00 2001 From: jnadal Date: Fri, 23 Aug 2024 14:33:02 +0200 Subject: [PATCH 1/2] added base code for running in a glue environement --- macros/utils/cross_db_utils/file_format.sql | 11 +++++ .../cross_db_utils/incremental_strategy.sql | 4 ++ macros/utils/run_queries/run_query.sql | 43 +++++++++++++++++++ .../table_operations/delete_and_insert.sql | 35 +++++++++++++++ .../table_operations/make_temp_relation.sql | 4 ++ .../data_monitoring_metrics.sql | 1 + .../schema_columns_snapshot.sql | 1 + models/edr/dbt_artifacts/dbt_columns.sql | 3 +- models/edr/dbt_artifacts/dbt_exposures.sql | 3 +- models/edr/dbt_artifacts/dbt_invocations.sql | 1 + models/edr/dbt_artifacts/dbt_metrics.sql | 3 +- models/edr/dbt_artifacts/dbt_models.sql | 1 + models/edr/dbt_artifacts/dbt_run_results.sql | 1 + models/edr/dbt_artifacts/dbt_seeds.sql | 1 + models/edr/dbt_artifacts/dbt_snapshots.sql | 1 + models/edr/dbt_artifacts/dbt_sources.sql | 3 +- models/edr/dbt_artifacts/dbt_tests.sql | 1 + .../dbt_source_freshness_results.sql | 1 + .../run_results/elementary_test_results.sql | 1 + models/edr/run_results/test_result_rows.sql | 1 + 20 files changed, 116 insertions(+), 4 deletions(-) create mode 100644 macros/utils/cross_db_utils/file_format.sql diff --git a/macros/utils/cross_db_utils/file_format.sql b/macros/utils/cross_db_utils/file_format.sql new file mode 100644 index 000000000..48310b43c --- /dev/null +++ b/macros/utils/cross_db_utils/file_format.sql @@ -0,0 +1,11 @@ +{% macro get_default_file_format() %} + {% do return(adapter.dispatch("get_default_file_format", "elementary")()) %} +{% endmacro %} + +{%- macro glue__get_default_file_format() %} + {% do return("delta") %} +{% endmacro %} + +{% macro default__get_default_file_format() %} + {% do return(none) %} +{% endmacro %} diff --git a/macros/utils/cross_db_utils/incremental_strategy.sql b/macros/utils/cross_db_utils/incremental_strategy.sql index c6ef526bd..8da0b40ba 100644 --- a/macros/utils/cross_db_utils/incremental_strategy.sql +++ b/macros/utils/cross_db_utils/incremental_strategy.sql @@ -13,3 +13,7 @@ {% macro default__get_default_incremental_strategy() %} {% do return(none) %} {% endmacro %} + +{%- macro glue__get_default_incremental_strategy() %} + {% do return("merge") %} +{% endmacro %} diff --git a/macros/utils/run_queries/run_query.sql b/macros/utils/run_queries/run_query.sql index 656c16940..689ebfb1f 100644 --- a/macros/utils/run_queries/run_query.sql +++ b/macros/utils/run_queries/run_query.sql @@ -1,4 +1,8 @@ {% macro run_query(query, lowercase_column_names=True) %} + {{ return(adapter.dispatch('run_query', 'elementary')(query, lowercase_column_names)) }} +{% endmacro %} + +{% macro default__run_query(query, lowercase_column_names=True) %} {% set query_result = dbt.run_query(query) %} {% if lowercase_column_names %} {% set lowercased_column_names = {} %} @@ -10,3 +14,42 @@ {% do return(query_result) %} {% endmacro %} + +{% macro glue__run_query(query, lowercase_column_names=True) %} + -- Glue does not support running queries that does not return results through the `run_query` method + -- We need to check if the query is a DDL or DML statement and run it using the `run_query_statement` statement + -- There are other statements that should not get query results, but keeping it simple for now + + {% set should_not_get_query_results_statements = ["create table", "create or replace table", "insert into"] %} + + {% set should_not_get_query_results = [] %} + {% for statement in should_not_get_query_results_statements %} + {% if statement in query.lower() %} + {% do should_not_get_query_results.append(True) %} + {% endif %} + {% endfor %} + + {% if should_not_get_query_results | length > 0 %} + -- Morover, Glue throws an error if the query contains escaped single quotes in single quotes strings + {% set curracted_query = query.replace("\\'","") %} + + {% call statement("run_query_statement", fetch_result=false, auto_begin=false) %} + {{ curracted_query }} + {% endcall %} + + {% else %} + {% set query_result = dbt.run_query(query) %} + + {% if lowercase_column_names %} + {% set lowercased_column_names = {} %} + {% for column_name in query_result.column_names %} + {% do lowercased_column_names.update({column_name: column_name.lower()}) %} + {% endfor %} + {% set query_result = query_result.rename(lowercased_column_names) %} + {% endif %} + + {% do return(query_result) %} + {% endif %} + +{% endmacro %} + diff --git a/macros/utils/table_operations/delete_and_insert.sql b/macros/utils/table_operations/delete_and_insert.sql index 4672ef116..dd854f4cf 100644 --- a/macros/utils/table_operations/delete_and_insert.sql +++ b/macros/utils/table_operations/delete_and_insert.sql @@ -87,6 +87,41 @@ {% do return(queries) %} {% endmacro %} + +{% macro glue__get_delete_and_insert_queries(relation, insert_relation, delete_relation, delete_column_key) %} + {% set queries = [] %} + + {# Calling `is_delta` raises an error if `metadata` is None - https://github.com/databricks/dbt-databricks/blob/33dca4b66b05f268741030b33659d34ff69591c1/dbt/adapters/databricks/relation.py#L71 #} + {{ log("get delete has delete_relation "~delete_relation~" relation.metadata "~relation.metadata~" relation.is_delta "~relation.is_delta, info=true) }} + {% if delete_relation and relation.is_delta %} + {% set delete_query %} + merge into {{ relation }} as source + using {{ delete_relation }} as target + on (source.{{ delete_column_key }} = target.{{ delete_column_key }}) or source.{{ delete_column_key }} is null + when matched then delete; + {% endset %} + {% do queries.append(delete_query) %} + + {% elif delete_relation %} + {% set delete_query %} + delete from {{ relation }} + where + {{ delete_column_key }} is null + or {{ delete_column_key }} in (select {{ delete_column_key }} from {{ delete_relation }}); + {% endset %} + {% do queries.append(delete_query) %} + {% endif %} + + {% if insert_relation %} + {% set insert_query %} + insert into {{ relation }} select * from {{ insert_relation }}; + {% endset %} + {% do queries.append(insert_query) %} + {% endif %} + + {% do return(queries) %} +{% endmacro %} + {% macro athena__get_delete_and_insert_queries(relation, insert_relation, delete_relation, delete_column_key) %} {% set queries = [] %} diff --git a/macros/utils/table_operations/make_temp_relation.sql b/macros/utils/table_operations/make_temp_relation.sql index 971184f35..3f7fdbc88 100644 --- a/macros/utils/table_operations/make_temp_relation.sql +++ b/macros/utils/table_operations/make_temp_relation.sql @@ -16,6 +16,10 @@ {% do return(tmp_relation) %} {% endmacro %} +{% macro glue__edr_make_temp_relation(base_relation, suffix) %} + {% do return(dbt.make_temp_relation(base_relation, suffix)) %} +{% endmacro %} + {% macro databricks__edr_make_temp_relation(base_relation, suffix) %} {% do return(dbt.make_temp_relation(base_relation, suffix)) %} {% endmacro %} diff --git a/models/edr/data_monitoring/data_monitoring/data_monitoring_metrics.sql b/models/edr/data_monitoring/data_monitoring/data_monitoring_metrics.sql index 3930efee7..6f6485453 100644 --- a/models/edr/data_monitoring/data_monitoring/data_monitoring_metrics.sql +++ b/models/edr/data_monitoring/data_monitoring/data_monitoring_metrics.sql @@ -9,6 +9,7 @@ "prev_timestamp_column": "updated_at", }, table_type=elementary.get_default_table_type(), + file_format=elementary.get_default_file_format(), incremental_strategy=elementary.get_default_incremental_strategy(), ) }} diff --git a/models/edr/data_monitoring/schema_changes/schema_columns_snapshot.sql b/models/edr/data_monitoring/schema_changes/schema_columns_snapshot.sql index ff0c07ac5..8c393470f 100644 --- a/models/edr/data_monitoring/schema_changes/schema_columns_snapshot.sql +++ b/models/edr/data_monitoring/schema_changes/schema_columns_snapshot.sql @@ -9,6 +9,7 @@ "prev_timestamp_column": "detected_at", }, table_type=elementary.get_default_table_type(), + file_format=elementary.get_default_file_format(), incremental_strategy=elementary.get_default_incremental_strategy() ) }} diff --git a/models/edr/dbt_artifacts/dbt_columns.sql b/models/edr/dbt_artifacts/dbt_columns.sql index dbef17e22..dc098f3bd 100644 --- a/models/edr/dbt_artifacts/dbt_columns.sql +++ b/models/edr/dbt_artifacts/dbt_columns.sql @@ -7,8 +7,9 @@ on_schema_change='sync_all_columns', full_refresh=elementary.get_config_var('elementary_full_refresh'), table_type=elementary.get_default_table_type(), + file_format=elementary.get_default_file_format(), incremental_strategy=elementary.get_default_incremental_strategy() - ) + ) }} {{ elementary.get_dbt_columns_empty_table_query() }} diff --git a/models/edr/dbt_artifacts/dbt_exposures.sql b/models/edr/dbt_artifacts/dbt_exposures.sql index e55769cf2..ae1ab3ed2 100644 --- a/models/edr/dbt_artifacts/dbt_exposures.sql +++ b/models/edr/dbt_artifacts/dbt_exposures.sql @@ -7,8 +7,9 @@ on_schema_change='sync_all_columns', full_refresh=elementary.get_config_var('elementary_full_refresh'), table_type=elementary.get_default_table_type(), + file_format=elementary.get_default_file_format(), incremental_strategy=elementary.get_default_incremental_strategy() - ) + ) }} {{ elementary.get_dbt_exposures_empty_table_query() }} diff --git a/models/edr/dbt_artifacts/dbt_invocations.sql b/models/edr/dbt_artifacts/dbt_invocations.sql index 157ac6f15..b85420a30 100644 --- a/models/edr/dbt_artifacts/dbt_invocations.sql +++ b/models/edr/dbt_artifacts/dbt_invocations.sql @@ -10,6 +10,7 @@ "prev_timestamp_column": "generated_at", }, table_type=elementary.get_default_table_type(), + file_format=elementary.get_default_file_format(), incremental_strategy=elementary.get_default_incremental_strategy() ) }} diff --git a/models/edr/dbt_artifacts/dbt_metrics.sql b/models/edr/dbt_artifacts/dbt_metrics.sql index 3f64bb7ad..28fb64d83 100644 --- a/models/edr/dbt_artifacts/dbt_metrics.sql +++ b/models/edr/dbt_artifacts/dbt_metrics.sql @@ -7,8 +7,9 @@ on_schema_change='sync_all_columns', full_refresh=elementary.get_config_var('elementary_full_refresh'), table_type=elementary.get_default_table_type(), + file_format=elementary.get_default_file_format(), incremental_strategy=elementary.get_default_incremental_strategy() - ) + ) }} {{ elementary.get_dbt_metrics_empty_table_query() }} diff --git a/models/edr/dbt_artifacts/dbt_models.sql b/models/edr/dbt_artifacts/dbt_models.sql index 80bb711c7..2b80b5cbe 100644 --- a/models/edr/dbt_artifacts/dbt_models.sql +++ b/models/edr/dbt_artifacts/dbt_models.sql @@ -7,6 +7,7 @@ on_schema_change='sync_all_columns', full_refresh=elementary.get_config_var('elementary_full_refresh'), table_type=elementary.get_default_table_type(), + file_format=elementary.get_default_file_format(), incremental_strategy=elementary.get_default_incremental_strategy() ) }} diff --git a/models/edr/dbt_artifacts/dbt_run_results.sql b/models/edr/dbt_artifacts/dbt_run_results.sql index 7f7731496..3c7d1fb02 100644 --- a/models/edr/dbt_artifacts/dbt_run_results.sql +++ b/models/edr/dbt_artifacts/dbt_run_results.sql @@ -11,6 +11,7 @@ "prev_timestamp_column": "generated_at", }, table_type=elementary.get_default_table_type(), + file_format=elementary.get_default_file_format(), incremental_strategy=elementary.get_default_incremental_strategy() ) }} diff --git a/models/edr/dbt_artifacts/dbt_seeds.sql b/models/edr/dbt_artifacts/dbt_seeds.sql index 2c58751f3..7e564dfb0 100644 --- a/models/edr/dbt_artifacts/dbt_seeds.sql +++ b/models/edr/dbt_artifacts/dbt_seeds.sql @@ -7,6 +7,7 @@ on_schema_change='sync_all_columns', full_refresh=elementary.get_config_var('elementary_full_refresh'), table_type=elementary.get_default_table_type(), + file_format=elementary.get_default_file_format(), incremental_strategy=elementary.get_default_incremental_strategy() ) }} diff --git a/models/edr/dbt_artifacts/dbt_snapshots.sql b/models/edr/dbt_artifacts/dbt_snapshots.sql index c662c48fa..3e0fe202b 100644 --- a/models/edr/dbt_artifacts/dbt_snapshots.sql +++ b/models/edr/dbt_artifacts/dbt_snapshots.sql @@ -7,6 +7,7 @@ on_schema_change='sync_all_columns', full_refresh=elementary.get_config_var('elementary_full_refresh'), table_type=elementary.get_default_table_type(), + file_format=elementary.get_default_file_format(), incremental_strategy=elementary.get_default_incremental_strategy() ) }} diff --git a/models/edr/dbt_artifacts/dbt_sources.sql b/models/edr/dbt_artifacts/dbt_sources.sql index 1f5c1022a..548081446 100644 --- a/models/edr/dbt_artifacts/dbt_sources.sql +++ b/models/edr/dbt_artifacts/dbt_sources.sql @@ -7,8 +7,9 @@ on_schema_change='sync_all_columns', full_refresh=elementary.get_config_var('elementary_full_refresh'), table_type=elementary.get_default_table_type(), + file_format=elementary.get_default_file_format(), incremental_strategy=elementary.get_default_incremental_strategy() - ) + ) }} {{ elementary.get_dbt_sources_empty_table_query() }} diff --git a/models/edr/dbt_artifacts/dbt_tests.sql b/models/edr/dbt_artifacts/dbt_tests.sql index 46159388a..4bdaaacb3 100644 --- a/models/edr/dbt_artifacts/dbt_tests.sql +++ b/models/edr/dbt_artifacts/dbt_tests.sql @@ -7,6 +7,7 @@ on_schema_change='sync_all_columns', full_refresh=elementary.get_config_var('elementary_full_refresh'), table_type=elementary.get_default_table_type(), + file_format=elementary.get_default_file_format(), incremental_strategy=elementary.get_default_incremental_strategy() ) }} diff --git a/models/edr/run_results/dbt_source_freshness_results.sql b/models/edr/run_results/dbt_source_freshness_results.sql index 9afbf56e8..24dc797c6 100644 --- a/models/edr/run_results/dbt_source_freshness_results.sql +++ b/models/edr/run_results/dbt_source_freshness_results.sql @@ -9,6 +9,7 @@ "prev_timestamp_column": "generated_at", }, table_type=elementary.get_default_table_type(), + file_format=elementary.get_default_file_format(), incremental_strategy=elementary.get_default_incremental_strategy() ) }} diff --git a/models/edr/run_results/elementary_test_results.sql b/models/edr/run_results/elementary_test_results.sql index 5fdb06825..076f41778 100644 --- a/models/edr/run_results/elementary_test_results.sql +++ b/models/edr/run_results/elementary_test_results.sql @@ -9,6 +9,7 @@ "prev_timestamp_column": "detected_at", }, table_type=elementary.get_default_table_type(), + file_format=elementary.get_default_file_format(), incremental_strategy=elementary.get_default_incremental_strategy() ) }} diff --git a/models/edr/run_results/test_result_rows.sql b/models/edr/run_results/test_result_rows.sql index f28bf0450..96416a8fe 100644 --- a/models/edr/run_results/test_result_rows.sql +++ b/models/edr/run_results/test_result_rows.sql @@ -11,6 +11,7 @@ "prev_timestamp_column": "detected_at", }, table_type=elementary.get_default_table_type(), + file_format=elementary.get_default_file_format(), incremental_strategy=elementary.get_default_incremental_strategy() ) }} From 6a516f900acc49b58ac19cd8d6646e45dcd21da2 Mon Sep 17 00:00:00 2001 From: jnadal Date: Fri, 23 Aug 2024 17:36:26 +0200 Subject: [PATCH 2/2] new changes for edr report --- macros/utils/run_queries/run_query.sql | 4 ++- .../table_operations/create_temp_table.sql | 27 ++++++++++++++++++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/macros/utils/run_queries/run_query.sql b/macros/utils/run_queries/run_query.sql index 689ebfb1f..c198187fc 100644 --- a/macros/utils/run_queries/run_query.sql +++ b/macros/utils/run_queries/run_query.sql @@ -20,7 +20,9 @@ -- We need to check if the query is a DDL or DML statement and run it using the `run_query_statement` statement -- There are other statements that should not get query results, but keeping it simple for now - {% set should_not_get_query_results_statements = ["create table", "create or replace table", "insert into"] %} + {#{{ log("want to run "~query, info=true) }}#} + + {% set should_not_get_query_results_statements = ["create table", "create or replace table", "insert into", "create view", "create temporary view", "create or replace temporary view"] %} {% set should_not_get_query_results = [] %} {% for statement in should_not_get_query_results_statements %} diff --git a/macros/utils/table_operations/create_temp_table.sql b/macros/utils/table_operations/create_temp_table.sql index a846e41ab..31ec3a51f 100644 --- a/macros/utils/table_operations/create_temp_table.sql +++ b/macros/utils/table_operations/create_temp_table.sql @@ -1,4 +1,8 @@ {% macro create_temp_table(database_name, schema_name, table_name, sql_query) %} + {% do return(adapter.dispatch("create_temp_table", "elementary")(database_name, schema_name, table_name, sql_query)) %} +{% endmacro %} + +{% macro default__create_temp_table(database_name, schema_name, table_name, sql_query) %} {% set temp_table_exists, temp_table_relation = dbt.get_or_create_relation(database=database_name, schema=schema_name, identifier=table_name, @@ -11,4 +15,25 @@ {% do elementary.run_query(dbt.create_table_as(True, temp_table_relation, sql_query)) %} {% endif %} {{ return(temp_table_relation) }} -{% endmacro %} \ No newline at end of file +{% endmacro %} + +{% macro glue__create_temp_table(database_name, schema_name, table_name, sql_query) %} + -- Apparently, inside glue, cannot query temporary tables + -- Moroever, there is a problem inside edr_make_temp_relation, we need to enforce no suffix + -- It is a problem because for the moment, it tries to get relation without suffix + -- and it puts none as suffix if not specified maybe the function is not well named + {% set temp_table_exists, temp_table_relation = dbt.get_or_create_relation(database=database_name, + schema=schema_name, + identifier=table_name, + type='table') -%} + {% set temp_table_relation = elementary.edr_make_temp_relation(temp_table_relation, suffix="") %} + + {% if temp_table_exists %} + {% do adapter.drop_relation(temp_table_relation) %} + {% do elementary.run_query(dbt.create_table_as(False, temp_table_relation, sql_query)) %} + {% else %} + {% do elementary.run_query(dbt.create_table_as(False, temp_table_relation, sql_query)) %} + {% endif %} + {{ return(temp_table_relation) }} +{% endmacro %} +