From 44aa42396c3d04a40467d8cdda64abc3b226b59f Mon Sep 17 00:00:00 2001 From: Nick Ross Date: Thu, 19 Oct 2023 11:46:41 +0100 Subject: [PATCH 1/2] fix: use convenience method for pipeline status --- dataworkspace/dataworkspace/apps/datasets/views.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dataworkspace/dataworkspace/apps/datasets/views.py b/dataworkspace/dataworkspace/apps/datasets/views.py index c53ddccbf6..14599c7071 100644 --- a/dataworkspace/dataworkspace/apps/datasets/views.py +++ b/dataworkspace/dataworkspace/apps/datasets/views.py @@ -125,7 +125,6 @@ from dataworkspace.apps.eventlog.models import EventLog from dataworkspace.apps.eventlog.utils import log_event, log_permission_change from dataworkspace.apps.explorer.utils import invalidate_data_explorer_user_cached_credentials -from dataworkspace.datasets_db import get_pipeline_last_success_date logger = logging.getLogger("app") @@ -432,8 +431,8 @@ def _get_user_tools_access(self) -> bool: return user_has_tools_access def _get_pipeline_info(self, source_table): - last_success_date = get_pipeline_last_success_date(source_table) - if last_success_date: + last_success_date = source_table.get_pipeline_last_success_date() + if last_success_date is not None: return abs((last_success_date - datetime.now()).days) return 0 From da8f45545c6bfdf50c2113023b8e22a4c19013fd Mon Sep 17 00:00:00 2001 From: Nick Ross Date: Thu, 19 Oct 2023 15:16:01 +0100 Subject: [PATCH 2/2] fix: add pipeline last success as bool --- .../dataworkspace/apps/datasets/views.py | 21 +++++----- dataworkspace/dataworkspace/datasets_db.py | 2 - dataworkspace/dataworkspace/tests/conftest.py | 39 ++++++++++++++++--- .../tests/datasets/test_views.py | 34 ++++++++++++++++ 4 files changed, 79 insertions(+), 17 deletions(-) diff --git a/dataworkspace/dataworkspace/apps/datasets/views.py b/dataworkspace/dataworkspace/apps/datasets/views.py index 14599c7071..689a2428d9 100644 --- a/dataworkspace/dataworkspace/apps/datasets/views.py +++ b/dataworkspace/dataworkspace/apps/datasets/views.py @@ -1,6 +1,5 @@ import json import logging -from datetime import datetime import uuid from collections import defaultdict, namedtuple from itertools import chain @@ -430,18 +429,18 @@ def _get_user_tools_access(self) -> bool: return user_has_tools_access - def _get_pipeline_info(self, source_table): - last_success_date = source_table.get_pipeline_last_success_date() - if last_success_date is not None: - return abs((last_success_date - datetime.now()).days) - return 0 - def _get_context_data_for_master_dataset(self, ctx, **kwargs): source_tables = sorted(self.object.sourcetable_set.all(), key=lambda x: x.name) MasterDatasetInfo = namedtuple( "MasterDatasetInfo", - ("source_table", "code_snippets", "columns", "tools_links", "pipeline_info"), + ( + "source_table", + "code_snippets", + "columns", + "tools_links", + "pipeline_last_run_succeeded", + ), ) master_datasets_info = [ MasterDatasetInfo( @@ -454,7 +453,8 @@ def _get_context_data_for_master_dataset(self, ctx, **kwargs): include_types=True, ), tools_links=get_tools_links_for_user(self.request.user, self.request.scheme), - pipeline_info=self._get_pipeline_info(source_table), + pipeline_last_run_succeeded=source_table.get_pipeline_last_run_state() + == "success", ) for source_table in sorted(source_tables, key=lambda x: x.name) ] @@ -484,6 +484,9 @@ def _get_context_data_for_master_dataset(self, ctx, **kwargs): and subscription.first().is_active(), "details": subscription.first(), }, + "all_pipeline_last_runs_succeeded": all( + (x.pipeline_last_run_succeeded for x in master_datasets_info) + ), } ) return ctx diff --git a/dataworkspace/dataworkspace/datasets_db.py b/dataworkspace/dataworkspace/datasets_db.py index b49adb33b1..978bed0db0 100644 --- a/dataworkspace/dataworkspace/datasets_db.py +++ b/dataworkspace/dataworkspace/datasets_db.py @@ -443,7 +443,6 @@ def get_pipeline_id_for_source_table(source_table): ); """ ).format( - Literal(DataSetType.DATACUT), Literal(source_table.schema), Literal(source_table.table), ) @@ -475,7 +474,6 @@ def get_last_run_state_for_pipeline(pipeline_name): ); """ ).format( - Literal(DataSetType.DATACUT), Literal(pipeline_name), ) ) diff --git a/dataworkspace/dataworkspace/tests/conftest.py b/dataworkspace/dataworkspace/tests/conftest.py index 42c4d5dc80..0c45783523 100644 --- a/dataworkspace/dataworkspace/tests/conftest.py +++ b/dataworkspace/dataworkspace/tests/conftest.py @@ -185,17 +185,44 @@ def metadata_db(db): data_type INTEGER NOT NULL, data_hash_v1 TEXT, primary_keys TEXT[], - number_of_rows INTEGER + number_of_rows INTEGER, + pipeline_name TEXT, + input_tables TEXT[] ); TRUNCATE TABLE dataflow.metadata; INSERT INTO dataflow.metadata ( - table_schema, table_name, source_data_modified_utc, dataflow_swapped_tables_utc, table_structure, data_type + table_schema, + table_name, + source_data_modified_utc, + dataflow_swapped_tables_utc, + table_structure, + data_type, + pipeline_name ) VALUES - ('public','table1','2020-09-02 00:01:00.0','2020-09-02 00:01:00.0','{"field1":"int","field2":"varchar"}',1), - ('public','table2','2020-09-01 00:01:00.0','2020-09-02 00:01:00.0',NULL,1), - ('public','table1','2020-01-01 00:01:00.0','2020-09-02 00:01:00.0',NULL,1), - ('public','table4', NULL,'2021-12-01 00:00:00.0',NULL,1); + ('public','table1','2020-09-02 00:01:00.0','2020-09-02 00:01:00.0', + '{"field1":"int","field2":"varchar"}',1,'Pipeline1'), + ('public','table2','2020-09-01 00:01:00.0','2020-09-02 00:01:00.0',NULL,1,'Pipeline2'), + ('public','table1','2020-01-01 00:01:00.0','2020-09-02 00:01:00.0',NULL,1,'Pipeline1'), + ('public','table4', NULL,'2021-12-01 00:00:00.0',NULL,1,NULL); + + CREATE TABLE IF NOT EXISTS dataflow.pipeline_dag_runs_v2 ( + pipeline_name TEXT, + pipeline_active TEXT, + final_state TEXT, + last_success_of_day TIMESTAMP, + run_end_date DATE + ); + TRUNCATE TABLE dataflow.pipeline_dag_runs_v2; + INSERT INTO dataflow.pipeline_dag_runs_v2 ( + pipeline_name, + pipeline_active, + final_state, + last_success_of_day, + run_end_date + ) VALUES + ('Pipeline1', 'active', 'success', '2020-09-02 00:01:00.0', '2020-09-02 00:01:00.0'), + ('Pipeline2', 'active', 'failed', '2020-09-02 00:01:00.0', '2020-09-02 00:01:00.0'); """ ) conn.commit() diff --git a/dataworkspace/dataworkspace/tests/datasets/test_views.py b/dataworkspace/dataworkspace/tests/datasets/test_views.py index ff33f4a8de..c50f602398 100644 --- a/dataworkspace/dataworkspace/tests/datasets/test_views.py +++ b/dataworkspace/dataworkspace/tests/datasets/test_views.py @@ -5084,3 +5084,37 @@ def test_delete(self, user, client, source_factory): ) assert response.status_code == 200 assert UserDataTableView.objects.count() == view_count - 1 + + +@pytest.mark.django_db +def test_master_dataset_detail_page_shows_pipeline_failures(client, metadata_db): + dataset = factories.DataSetFactory.create( + type=DataSetType.MASTER, + published=True, + user_access_type=UserAccessType.REQUIRES_AUTHORIZATION, + ) + factories.SourceTableFactory( + dataset=dataset, database=metadata_db, schema="public", table="table1" + ) + factories.SourceTableFactory( + dataset=dataset, database=metadata_db, schema="public", table="table2" + ) + + url = reverse("datasets:dataset_detail", args=(dataset.id,)) + response = client.get(url) + assert response.status_code == 200 + assert not response.context["all_pipeline_last_runs_succeeded"] + assert ( + len( + [ + x + for x in response.context["master_datasets_info"] + if not x.pipeline_last_run_succeeded + ] + ) + == 1 + ) + assert ( + len([x for x in response.context["master_datasets_info"] if x.pipeline_last_run_succeeded]) + == 1 + )