kestra-io · Ben8t · Jan 23, 2025 · Feb 11, 2025
diff --git a/src/test/resources/sanity-checks/dbt_cli.yaml b/src/test/resources/sanity-checks/dbt_cli.yaml
@@ -0,0 +1,62 @@
+id: dbt
+namespace: sanitychecks
+
+tasks:
+  - id: dbt
+    type: io.kestra.plugin.core.flow.WorkingDirectory
+    tasks:
+      - id: clone_repository
+        type: io.kestra.plugin.git.Clone
+        url: https://github.com/kestra-io/dbt-demo
+        branch: main
+
+      - id: dbt_build
+        type: io.kestra.plugin.dbt.cli.DbtCLI
+        taskRunner:
+          type: io.kestra.plugin.scripts.runner.docker.Docker
+        containerImage: ghcr.io/kestra-io/dbt-duckdb:latest
+        commands:
+          - dbt deps
+          - dbt build
+        profiles: |
+          jaffle_shop:
+            outputs:
+              dev:
+                type: duckdb
+                path: dbt.duckdb
+                extensions: 
+                  - parquet
+                fixed_retries: 1
+                threads: 16
+                timeout_seconds: 300
+            target: dev      
+
+      - id: python
+        type: io.kestra.plugin.scripts.python.Script
+        outputFiles:
+          - "*.csv"
+        taskRunner:
+          type: io.kestra.plugin.scripts.runner.docker.Docker
+        containerImage: ghcr.io/kestra-io/duckdb:latest
+        script: |
+          import duckdb
+          import pandas as pd
+
+          conn = duckdb.connect(database='dbt.duckdb', read_only=False)
+
+          tables_query = "SELECT table_name FROM information_schema.tables WHERE table_schema = 'main';"
+
+          tables = conn.execute(tables_query).fetchall()
+
+          # Export each table to CSV, excluding tables that start with 'raw' or
+          'stg'
+
+          for table_name in tables:
+              table_name = table_name[0]
+              # Skip tables with names starting with 'raw' or 'stg'
+              if not table_name.startswith('raw') and not table_name.startswith('stg'):
+                  query = f"SELECT * FROM {table_name}"
+                  df = conn.execute(query).fetchdf()
+                  df.to_csv(f"{table_name}.csv", index=False)
+
+          conn.close()