From 5e59d0e2058f5dd2a5ccfee34b86b13a648b801c Mon Sep 17 00:00:00 2001 From: Anna Geller Date: Wed, 6 Nov 2024 11:50:52 +0100 Subject: [PATCH] fix: stars blueprint and namespace names --- dwh-and-analytics-1.yaml | 93 ------------------------ dwh-and-analytics.yaml | 91 +++++++++++++++-------- failure-alert-sentry.yaml | 6 +- notify-about-github-stars-via-slack.yaml | 3 +- produce-to-rabbitmq.yaml | 2 +- send-email-with-attachment.yaml | 2 +- weaviate-load-and-query.yaml | 3 +- zenduty-failure-alert.yaml | 6 +- 8 files changed, 69 insertions(+), 137 deletions(-) delete mode 100644 dwh-and-analytics-1.yaml diff --git a/dwh-and-analytics-1.yaml b/dwh-and-analytics-1.yaml deleted file mode 100644 index ee2dc82..0000000 --- a/dwh-and-analytics-1.yaml +++ /dev/null @@ -1,93 +0,0 @@ -id: dwh-and-analytics -namespace: tutorial -description: Data Warehouse and Analytics -tasks: - - id: dbt - type: io.kestra.plugin.core.flow.WorkingDirectory - tasks: - - id: clone_repository - type: io.kestra.plugin.git.Clone - url: https://github.com/kestra-io/dbt-demo - branch: main - - id: dbt_build - type: io.kestra.plugin.dbt.cli.DbtCLI - taskRunner: - type: io.kestra.plugin.scripts.runner.docker.Docker - containerImage: ghcr.io/kestra-io/dbt-duckdb:latest - commands: - - dbt deps - - dbt build - profiles: | - jaffle_shop: - outputs: - dev: - type: duckdb - path: dbt.duckdb - extensions: - - parquet - fixed_retries: 1 - threads: 16 - timeout_seconds: 300 - target: dev - - id: python - type: io.kestra.plugin.scripts.python.Script - outputFiles: - - "*.csv" - taskRunner: - type: io.kestra.plugin.scripts.runner.docker.Docker - containerImage: ghcr.io/kestra-io/duckdb:latest - script: > - import duckdb - - import pandas as pd - - - conn = duckdb.connect(database='dbt.duckdb', read_only=False) - - - tables_query = "SELECT table_name FROM information_schema.tables WHERE - table_schema = 'main';" - - tables = conn.execute(tables_query).fetchall() - - - # Export each table to CSV, excluding tables that start with 'raw' or - 'stg' - - for table_name in tables: - table_name = table_name[0] - # Skip tables with names starting with 'raw' or 'stg' - if not table_name.startswith('raw') and not table_name.startswith('stg'): - query = f"SELECT * FROM {table_name}" - df = conn.execute(query).fetchdf() - df.to_csv(f"{table_name}.csv", index=False) - - conn.close() -extend: - title: Getting started with Kestra — a Data Warehouse and Analytics workflow example - description: >- - This flow is a simple example of a data warehouse and analytics use case. It - clones a dbt repository, builds the dbt project, and exports the data to CSV - files. - - - The flow has three tasks: - - 1. The first task clones a dbt repository. - - 2. The second task builds the dbt models and tests using DuckDB. - - 3. The third task exports the transformed data to CSV files. - tags: - - Getting Started - - Data - - dbt - - DuckDB - - Git - - Python - - Docker - ee: true - demo: true - meta_description: This flow is a simple example of a data warehouse and - analytics use case. It clones a dbt repository, builds the dbt project, and - exports the data to CSV files. diff --git a/dwh-and-analytics.yaml b/dwh-and-analytics.yaml index 45eecdc..ee2dc82 100644 --- a/dwh-and-analytics.yaml +++ b/dwh-and-analytics.yaml @@ -1,28 +1,24 @@ id: dwh-and-analytics -namespace: company.team -description: > - ## Data Platform - - Clone a [Git repository](https://github.com/kestra-io/dbt-example) and build - dbt models +namespace: tutorial +description: Data Warehouse and Analytics tasks: - id: dbt type: io.kestra.plugin.core.flow.WorkingDirectory tasks: - id: clone_repository type: io.kestra.plugin.git.Clone - url: https://github.com/kestra-io/dbt-example - branch: master + url: https://github.com/kestra-io/dbt-demo + branch: main - id: dbt_build type: io.kestra.plugin.dbt.cli.DbtCLI taskRunner: type: io.kestra.plugin.scripts.runner.docker.Docker containerImage: ghcr.io/kestra-io/dbt-duckdb:latest commands: - - dbt deps --project-dir dbt - - dbt build --project-dir dbt + - dbt deps + - dbt build profiles: | - my_dbt_project: + jaffle_shop: outputs: dev: type: duckdb @@ -32,35 +28,66 @@ tasks: fixed_retries: 1 threads: 16 timeout_seconds: 300 - target: dev -extend: - title: Git workflow for dbt — pull latest dbt changes from GitHub and build dbt - models from your dbt project directory - description: > - This flow does the following: + target: dev + - id: python + type: io.kestra.plugin.scripts.python.Script + outputFiles: + - "*.csv" + taskRunner: + type: io.kestra.plugin.scripts.runner.docker.Docker + containerImage: ghcr.io/kestra-io/duckdb:latest + script: > + import duckdb - 1. Clones the latest dbt code from - [GitHub](https://github.com/kestra-io/dbt-example/tree/master) + import pandas as pd - 2. Pulls a public [container image with the latest package - dependencies](https://github.com/kestra-io/examples/pkgs/container/dbt-duckdb) - 3. Runs dbt CLI commands in a Docker container. + conn = duckdb.connect(database='dbt.duckdb', read_only=False) - Note how the dbt CLI commands use the `--project-dir dbt` flag. This is - important because the [example GitHub project we - use](https://github.com/kestra-io/dbt-example/tree/master) has the dbt - project code located in a dedicated folder. + tables_query = "SELECT table_name FROM information_schema.tables WHERE + table_schema = 'main';" + tables = conn.execute(tables_query).fetchall() - If your dbt project is located outside of the root directory in your Git - repository structure, make sure to use the `--project-dir dbt` flag. + + # Export each table to CSV, excluding tables that start with 'raw' or + 'stg' + + for table_name in tables: + table_name = table_name[0] + # Skip tables with names starting with 'raw' or 'stg' + if not table_name.startswith('raw') and not table_name.startswith('stg'): + query = f"SELECT * FROM {table_name}" + df = conn.execute(query).fetchdf() + df.to_csv(f"{table_name}.csv", index=False) + + conn.close() +extend: + title: Getting started with Kestra — a Data Warehouse and Analytics workflow example + description: >- + This flow is a simple example of a data warehouse and analytics use case. It + clones a dbt repository, builds the dbt project, and exports the data to CSV + files. + + + The flow has three tasks: + + 1. The first task clones a dbt repository. + + 2. The second task builds the dbt models and tests using DuckDB. + + 3. The third task exports the transformed data to CSV files. tags: - - Git + - Getting Started + - Data - dbt - DuckDB - ee: false + - Git + - Python + - Docker + ee: true demo: true - meta_description: This flow clones a dbt Git repository from GitHub and then - runs dbt CLI commands in a Docker container. + meta_description: This flow is a simple example of a data warehouse and + analytics use case. It clones a dbt repository, builds the dbt project, and + exports the data to CSV files. diff --git a/failure-alert-sentry.yaml b/failure-alert-sentry.yaml index ef67e41..a744204 100644 --- a/failure-alert-sentry.yaml +++ b/failure-alert-sentry.yaml @@ -1,5 +1,5 @@ id: failure-alert-sentry -namespace: prod.monitoring +namespace: system tasks: - id: send_alert type: io.kestra.plugin.notifications.sentry.SentryExecution @@ -16,10 +16,10 @@ triggers: - FAILED - WARNING - type: io.kestra.plugin.core.condition.ExecutionNamespaceCondition - namespace: prod + namespace: company prefix: true extend: - title: Send an alert to Sentry when any flow fails in the production namespace + title: Send an alert to Sentry when any flow fails in the company namespace description: >- This flow shows how to send an alert to Sentry when a flow fails. The only required input is a DSN string value, which you can find when you go to your diff --git a/notify-about-github-stars-via-slack.yaml b/notify-about-github-stars-via-slack.yaml index b2381e8..290b04e 100644 --- a/notify-about-github-stars-via-slack.yaml +++ b/notify-about-github-stars-via-slack.yaml @@ -1,5 +1,5 @@ id: notify-about-github-stars-via-slack -namespace: blueprints +namespace: company.team inputs: - id: repo type: STRING @@ -27,7 +27,6 @@ extend: tags: - API - - Getting Started - Git - Outputs ee: false diff --git a/produce-to-rabbitmq.yaml b/produce-to-rabbitmq.yaml index 122e15d..4f06bd4 100644 --- a/produce-to-rabbitmq.yaml +++ b/produce-to-rabbitmq.yaml @@ -30,7 +30,7 @@ extend: id: read_orders - namespace: blueprint + namespace: company.team tasks: diff --git a/send-email-with-attachment.yaml b/send-email-with-attachment.yaml index 6139333..47d6e49 100644 --- a/send-email-with-attachment.yaml +++ b/send-email-with-attachment.yaml @@ -1,5 +1,5 @@ id: send-email-with-attachment -namespace: blueprint +namespace: company.team tasks: - id: dataset1 type: io.kestra.plugin.core.http.Download diff --git a/weaviate-load-and-query.yaml b/weaviate-load-and-query.yaml index 16cd288..9c61fb2 100644 --- a/weaviate-load-and-query.yaml +++ b/weaviate-load-and-query.yaml @@ -97,8 +97,7 @@ extend: The last task performing a [Generative Search](https://weaviate.io/developers/weaviate/starter-guides/generative#what-is-generative-search) is currently disabled, as it requires an OpenAI API key and following the - [Weaviate getting started - documentation](https://weaviate.io/developers/weaviate/quickstart). You can + [Weaviate documentation](https://weaviate.io/developers/weaviate/quickstart). You can enable it by removing the `disabled: true` line. tags: - AI diff --git a/zenduty-failure-alert.yaml b/zenduty-failure-alert.yaml index d4a02e3..3c56735 100644 --- a/zenduty-failure-alert.yaml +++ b/zenduty-failure-alert.yaml @@ -1,5 +1,5 @@ id: zenduty-failure-alert -namespace: prod.monitoring +namespace: system tasks: - id: send_alert type: io.kestra.plugin.notifications.zenduty.ZendutyExecution @@ -17,10 +17,10 @@ triggers: - FAILED - WARNING - type: io.kestra.plugin.core.condition.ExecutionNamespaceCondition - namespace: prod + namespace: company prefix: true extend: - title: Send an alert to Zenduty when any flow fails in the production namespace + title: Send an alert to Zenduty when any flow fails in the company namespace description: >- This flow sends an alert to Zenduty when a production flow fails. The only required input is an integration key string value. Check the [Zenduty