kestra-io · anna-geller · Nov 6, 2024 · Nov 6, 2024
diff --git a/dwh-and-analytics-1.yaml b/dwh-and-analytics-1.yaml
diff --git a/dwh-and-analytics.yaml b/dwh-and-analytics.yaml
@@ -1,28 +1,24 @@
 id: dwh-and-analytics
-namespace: company.team
-description: >
-  ## Data Platform
-
-  Clone a [Git repository](https://github.com/kestra-io/dbt-example) and build
-  dbt models
+namespace: tutorial
+description: Data Warehouse and Analytics
 tasks:
   - id: dbt
     type: io.kestra.plugin.core.flow.WorkingDirectory
     tasks:
       - id: clone_repository
         type: io.kestra.plugin.git.Clone
-        url: https://github.com/kestra-io/dbt-example
-        branch: master
+        url: https://github.com/kestra-io/dbt-demo
+        branch: main
       - id: dbt_build
         type: io.kestra.plugin.dbt.cli.DbtCLI
         taskRunner:
           type: io.kestra.plugin.scripts.runner.docker.Docker
         containerImage: ghcr.io/kestra-io/dbt-duckdb:latest
         commands:
-          - dbt deps --project-dir dbt
-          - dbt build --project-dir dbt
+          - dbt deps
+          - dbt build
         profiles: |
-          my_dbt_project:
+          jaffle_shop:
             outputs:
               dev:
                 type: duckdb
@@ -32,35 +28,66 @@ tasks:
                 fixed_retries: 1
                 threads: 16
                 timeout_seconds: 300
-            target: dev
-extend:
-  title: Git workflow for dbt — pull latest dbt changes from GitHub and build dbt
-    models from your dbt project directory
-  description: >
-    This flow does the following:
+            target: dev      
+      - id: python
+        type: io.kestra.plugin.scripts.python.Script
+        outputFiles:
+          - "*.csv"
+        taskRunner:
+          type: io.kestra.plugin.scripts.runner.docker.Docker
+        containerImage: ghcr.io/kestra-io/duckdb:latest
+        script: >
+          import duckdb
 
-    1. Clones the latest dbt code from
-    [GitHub](https://github.com/kestra-io/dbt-example/tree/master)
+          import pandas as pd
 
-    2. Pulls a public [container image with the latest package
-    dependencies](https://github.com/kestra-io/examples/pkgs/container/dbt-duckdb)
 
-    3. Runs dbt CLI commands in a Docker container.
+          conn = duckdb.connect(database='dbt.duckdb', read_only=False)
 
 
-    Note how the dbt CLI commands use the `--project-dir dbt` flag. This is
-    important because the [example GitHub project we
-    use](https://github.com/kestra-io/dbt-example/tree/master) has the dbt
-    project code located in a dedicated folder. 
+          tables_query = "SELECT table_name FROM information_schema.tables WHERE
+          table_schema = 'main';"
 
+          tables = conn.execute(tables_query).fetchall()
 
-    If your dbt project is located outside of the root directory in your Git
-    repository structure, make sure to use the `--project-dir dbt` flag. 
+
+          # Export each table to CSV, excluding tables that start with 'raw' or
+          'stg'
+
+          for table_name in tables:
+              table_name = table_name[0]
+              # Skip tables with names starting with 'raw' or 'stg'
+              if not table_name.startswith('raw') and not table_name.startswith('stg'):
+                  query = f"SELECT * FROM {table_name}"
+                  df = conn.execute(query).fetchdf()
+                  df.to_csv(f"{table_name}.csv", index=False)
+
+          conn.close()
+extend:
+  title: Getting started with Kestra — a Data Warehouse and Analytics workflow example
+  description: >-
+    This flow is a simple example of a data warehouse and analytics use case. It
+    clones a dbt repository, builds the dbt project, and exports the data to CSV
+    files.
+
+
+    The flow has three tasks:
+
+    1. The first task clones a dbt repository.
+
+    2. The second task builds the dbt models and tests using DuckDB.
+
+    3. The third task exports the transformed data to CSV files.
   tags:
-    - Git
+    - Getting Started
+    - Data
     - dbt
     - DuckDB
-  ee: false
+    - Git
+    - Python
+    - Docker
+  ee: true
   demo: true
-  meta_description: This flow clones a dbt Git repository from GitHub and then
-    runs dbt CLI commands in a Docker container.
+  meta_description: This flow is a simple example of a data warehouse and
+    analytics use case. It clones a dbt repository, builds the dbt project, and
+    exports the data to CSV files.
diff --git a/failure-alert-sentry.yaml b/failure-alert-sentry.yaml
@@ -1,5 +1,5 @@
 id: failure-alert-sentry
-namespace: prod.monitoring
+namespace: system
 tasks:
   - id: send_alert
     type: io.kestra.plugin.notifications.sentry.SentryExecution
@@ -16,10 +16,10 @@ triggers:
           - FAILED
           - WARNING
       - type: io.kestra.plugin.core.condition.ExecutionNamespaceCondition
-        namespace: prod
+        namespace: company
         prefix: true
 extend:
-  title: Send an alert to Sentry when any flow fails in the production namespace
+  title: Send an alert to Sentry when any flow fails in the company namespace
   description: >-
     This flow shows how to send an alert to Sentry when a flow fails. The only
     required input is a DSN string value, which you can find when you go to your

diff --git a/notify-about-github-stars-via-slack.yaml b/notify-about-github-stars-via-slack.yaml
@@ -1,5 +1,5 @@
 id: notify-about-github-stars-via-slack
-namespace: blueprints
+namespace: company.team
 inputs:
   - id: repo
     type: STRING
@@ -27,7 +27,6 @@ extend:
 
   tags:
     - API
-    - Getting Started
     - Git
     - Outputs
   ee: false

diff --git a/produce-to-rabbitmq.yaml b/produce-to-rabbitmq.yaml
@@ -30,7 +30,7 @@ extend:
 
     id: read_orders
 
-    namespace: blueprint
+    namespace: company.team
 
 
     tasks:

diff --git a/send-email-with-attachment.yaml b/send-email-with-attachment.yaml
@@ -1,5 +1,5 @@
 id: send-email-with-attachment
-namespace: blueprint
+namespace: company.team
 tasks:
   - id: dataset1
     type: io.kestra.plugin.core.http.Download

diff --git a/weaviate-load-and-query.yaml b/weaviate-load-and-query.yaml
@@ -97,8 +97,7 @@ extend:
     The last task performing a [Generative
     Search](https://weaviate.io/developers/weaviate/starter-guides/generative#what-is-generative-search)
     is currently disabled, as it requires an OpenAI API key and following the
-    [Weaviate getting started
-    documentation](https://weaviate.io/developers/weaviate/quickstart). You can
+    [Weaviate documentation](https://weaviate.io/developers/weaviate/quickstart). You can
     enable it by removing the `disabled: true` line.
   tags:
     - AI

diff --git a/zenduty-failure-alert.yaml b/zenduty-failure-alert.yaml
@@ -1,5 +1,5 @@
 id: zenduty-failure-alert
-namespace: prod.monitoring
+namespace: system
 tasks:
   - id: send_alert
     type: io.kestra.plugin.notifications.zenduty.ZendutyExecution
@@ -17,10 +17,10 @@ triggers:
           - FAILED
           - WARNING
       - type: io.kestra.plugin.core.condition.ExecutionNamespaceCondition
-        namespace: prod
+        namespace: company
         prefix: true
 extend:
-  title: Send an alert to Zenduty when any flow fails in the production namespace
+  title: Send an alert to Zenduty when any flow fails in the company namespace
   description: >-
     This flow sends an alert to Zenduty when a production flow fails. The only
     required input is an integration key string value. Check the [Zenduty
-Original file line number
+Diff line change
@@ Expand Up / @@ -30,7 +30,7 @@ extend: @@
         id: read_orders
-        namespace: blueprint
+        namespace: company.team
         tasks:
@@ Expand Down @@