From a8bdcafd74fe16844e4d9880187cce09c57eb3b4 Mon Sep 17 00:00:00 2001 From: neil Date: Fri, 11 Oct 2024 09:11:51 +0000 Subject: [PATCH 01/10] chore(tooling): swap out dbviz custom database diagram tool for SchemaSpy, remove dependencies in catalyst-ci postgresql base --- earthly/postgresql/Earthfile | 50 +++- earthly/postgresql/scripts/std_docs.py | 361 +++---------------------- earthly/postgresql/templates/schema.md | 64 +++++ 3 files changed, 138 insertions(+), 337 deletions(-) create mode 100644 earthly/postgresql/templates/schema.md diff --git a/earthly/postgresql/Earthfile b/earthly/postgresql/Earthfile index 37f75b076..4d45cfab0 100644 --- a/earthly/postgresql/Earthfile +++ b/earthly/postgresql/Earthfile @@ -2,10 +2,9 @@ VERSION 0.8 IMPORT ../rust/tools AS rust-tools -IMPORT ../../utilities/dbviz AS dbviz IMPORT ../../utilities/scripts AS scripts -# cspell: words psycopg dbviz +# cspell: words psycopg postgres-base: FROM postgres:16.4-bookworm @@ -43,10 +42,6 @@ postgres-base: # Get refinery COPY rust-tools+tool-refinery/refinery /bin - # Get dbviz - COPY dbviz+build/dbviz /bin - RUN dbviz --help - # Copy our set SQL files COPY --dir sql /sql @@ -56,7 +51,11 @@ postgres-base: DO scripts+ADD_BASH_SCRIPTS DO scripts+ADD_PYTHON_SCRIPTS + # Copy templates to the working directory + COPY --dir templates /templates + SAVE ARTIFACT /scripts /scripts + SAVE ARTIFACT /templates /templates # Common build setup steps. # Arguments: @@ -82,8 +81,13 @@ BUILDER: # DOCS - FUNCTION to build the docs, needs to be run INSIDE the BUILDER like so: # -# 1. Create a ./docs/diagrams.json which has the options needed to run to generate the docs to /docs -# 2. Define the following targets in your earthfile +# This function uses SchemaSpy to generate database documentation. +# SchemaSpy creates detailed, browsable ER diagrams and schema documentation. +# +# To use this function: +# 1. Ensure your migrations are in the ./migrations directory +# 2. Have a refinery.toml file to configure the migrations +# 3. Define the following targets in your earthfile: # # builder: # DO github.com/input-output-hk/catalyst-ci/earthly/postgresql:+BUILDER --sqlfluff_cfg=./../../+repo-config/repo/.sqlfluff @@ -93,25 +97,46 @@ BUILDER: # # DO github.com/input-output-hk/catalyst-ci/earthly/postgresql:+BUILD --image_name= # DO github.com/input-output-hk/catalyst-ci/earthly/postgresql:+DOCS +# +# The generated documentation will be saved in the ./docs artifact. DOCS: FUNCTION - ARG diagrams=./diagrams.json ARG migrations=./migrations ARG refinery_toml=./refinery.toml + FROM +docs-base + USER postgres:postgres WORKDIR /docs - COPY $diagrams ./diagrams.json COPY --dir $migrations . COPY --dir $refinery_toml . - RUN /scripts/std_docs.py ./diagrams.json + RUN /scripts/std_docs.py + # Pull templates artifact from postgres-base + COPY +postgres-base/templates/schema.md ./docs/schema.md - SAVE ARTIFACT docs /docs + SAVE ARTIFACT docs ./docs +docs-base: + FROM +postgres-base + # Define ARGs for versions specific to docs + ARG SCHEMASPY_VERSION=6.2.4 + ARG POSTGRESQL_JDBC_VERSION=42.7.4 + ARG OPENJDK_VERSION=21 + + WORKDIR / + + # Setup SchemaSpy requirements, commands combined to reduce layers + RUN echo "deb http://ftp.de.debian.org/debian sid main" > /etc/apt/sources.list && \ + apt-get update && \ + apt-get install -y \ + openjdk-${OPENJDK_VERSION}-jre-headless && \ + wget -O /bin/schemaspy.jar https://github.com/schemaspy/schemaspy/releases/download/v${SCHEMASPY_VERSION}/schemaspy-${SCHEMASPY_VERSION}.jar && \ + wget -O /bin/postgresql.jar https://jdbc.postgresql.org/download/postgresql-${POSTGRESQL_JDBC_VERSION}.jar && \ + apt-get clean && rm -rf /var/lib/apt/lists/* # Linter checks for sql files CHECK: @@ -171,3 +196,4 @@ BUILD: # Push the container... SAVE IMAGE ${image_name}:latest + diff --git a/earthly/postgresql/scripts/std_docs.py b/earthly/postgresql/scripts/std_docs.py index 09789a49c..b5dbc8ea7 100755 --- a/earthly/postgresql/scripts/std_docs.py +++ b/earthly/postgresql/scripts/std_docs.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# cspell: words dbmigrations dbviz dbhost dbuser dbuserpw Tsvg +# cspell: words dbmigrations dbhost dbuser dbuserpw Tsvg from typing import Optional import python.exec_manager as exec_manager @@ -10,87 +10,14 @@ from rich import print import os import re -import json -from dataclasses import dataclass from textwrap import indent - -@dataclass -class DiagramCfg: - title: str - version: int - migration_name: str - tables: Optional[list[str]] - included_tables: Optional[list[str]] - excluded_tables: Optional[list[str]] - comments: Optional[bool] - column_description_wrap: Optional[int] - table_description_wrap: Optional[int] - sql_data: str - - def include( - self, - extra_includes: Optional[list[str]] = None, - extra_excludes: Optional[list[str]] = None, - ) -> Optional[list[str]]: - # We exclude from the global include tables, any tables the migration - # itself requests to be excluded. - - include_tables = self.included_tables if self.included_tables else [] - tables = self.tables if self.tables else [] - extra_includes = extra_includes if extra_includes else [] - excluded_tables = self.excluded_tables if self.excluded_tables else [] - extra_excludes = extra_excludes if extra_excludes else [] - - for table in tables + extra_includes: - if ( - table not in excluded_tables - and table not in extra_excludes - and table not in include_tables - ): - include_tables.append(table) - - if len(include_tables) == 0: - include_tables = None - - return include_tables - - def exclude( - self, extra_excludes: Optional[list[str]] = None - ) -> Optional[list[str]]: - # We exclude from the global exclude tables, any tables the migration - # specifically includes. - exclude_tables = self.excluded_tables if self.excluded_tables else [] - extra_excludes = extra_excludes if extra_excludes else [] - for table in extra_excludes: - if table not in exclude_tables: - exclude_tables.append(table) - - if len(exclude_tables) == 0: - exclude_tables = None - - return exclude_tables - - def process_sql_files(directory): file_pattern = r"V(\d+)__(\w+)\.sql" - table_pattern = r"CREATE TABLE(?: IF NOT EXISTS)? (\w+)" - - diagram_option_pattern = r"^--\s*(Title|Include|Exclude|Comment|Column Description Wrap|Table Description Wrap)\s+:\s*(.*)$" - migrations = {} largest_version = 0 for filename in os.listdir(directory): - clean_sql = "" - title = None - table_names = [] - included_tables = None - excluded_tables = None - comments = None - column_description_wrap = None - table_description_wrap = None - match = re.match(file_pattern, filename) if match: version = int(match.group(1)) @@ -101,61 +28,14 @@ def process_sql_files(directory): with open(os.path.join(directory, filename), "r") as file: sql_data = file.read() - for line in sql_data.splitlines(): - match = re.match(diagram_option_pattern, line) - if match: - if match.group(1).lower() == "title" and title is None: - title = match.group(2) - elif ( - match.group(1).lower() == "include" - and len(match.group(2)) > 0 - ): - if included_tables is None: - included_tables = [] - included_tables.append(match.group(2).split()) - elif ( - match.group(1).lower() == "exclude" - and len(match.group(2)) > 0 - ): - if excluded_tables is None: - excluded_tables = [] - excluded_tables.append(match.group(2).split()) - elif match.group(1).lower() == "comment": - if match.group(2).strip().lower() == "true": - comments = True - elif match.group(1).lower() == "column description wrap": - try: - column_description_wrap = int(match.group(2)) - except: - pass - elif match.group(1).lower() == "table description wrap": - try: - table_description_wrap = int(match.group(2)) - except: - pass - else: - # We strip diagram options from the SQL. - clean_sql += line + "\n" - - match = re.match(table_pattern, line) - if match: - table_names.append(match.group(1)) - - migrations[version] = DiagramCfg( - title, - version, - migration_name, - table_names, - included_tables, - excluded_tables, - comments, - column_description_wrap, - table_description_wrap, - clean_sql, - ) - return migrations, largest_version + migrations[version] = { + "version": version, + "migration_name": migration_name, + "sql_data": sql_data + } + return migrations, largest_version class Migrations: def __init__(self, args: argparse.Namespace): @@ -169,184 +49,8 @@ def __init__(self, args: argparse.Namespace): None """ self.args = args - - with open(args.diagram_config) as f: - self.config = json.load(f) - self.migrations, self.migration_version = process_sql_files(args.dbmigrations) - def schema_name(self) -> str: - return self.config.get("name", "Database Schema") - - def all_schema_comments(self) -> bool: - return self.config.get("all_schema", {}).get("comments", False) - - def full_schema_comments(self) -> bool: - return self.config.get("full_schema", {}).get( - "comments", self.all_schema_comments() - ) - - def all_schema_included_tables(self) -> list[str]: - return self.config.get("all_schema", {}).get("included_tables", []) - - def all_schema_excluded_tables(self) -> list[str]: - return self.config.get("all_schema", {}).get("excluded_tables", []) - - def full_schema_excluded_tables(self) -> list[str]: - return self.config.get("full_schema", {}).get( - "excluded_tables", self.all_schema_excluded_tables() - ) - - def all_schema_column_description_wrap(self) -> int: - return self.config.get("all_schema", {}).get("column_description_wrap", 50) - - def full_schema_column_description_wrap(self) -> int: - return self.config.get("full_schema", {}).get( - "column_description_wrap", self.all_schema_column_description_wrap() - ) - - def all_schema_table_description_wrap(self) -> int: - return self.config.get("all_schema", {}).get("table_description_wrap", 50) - - def full_schema_table_description_wrap(self) -> int: - return self.config.get("full_schema", {}).get( - "table_description_wrap", self.all_schema_table_description_wrap() - ) - - def dbviz( - self, - filename: str, - name: str, - title: str, - included_tables: Optional[list[str]] = None, - excluded_tables: Optional[list[str]] = None, - comments: Optional[bool] = None, - column_description_wrap: Optional[int] = None, - table_description_wrap: Optional[int] = None, - ) -> exec_manager.Result: - if len(title) > 0: - title = f' --title "{title}"' - - includes = "" - if included_tables: - for table in included_tables: - includes += f" -i {table}" - - excludes = "" - if excluded_tables: - for table in excluded_tables: - excludes += f" -e {table}" - - if comments: - comments = " --comments" - else: - comments = "" - - if column_description_wrap and column_description_wrap > 0: - column_description_wrap = ( - f" --column-description-wrap {column_description_wrap}" - ) - else: - column_description_wrap = "" - - if table_description_wrap and table_description_wrap > 0: - table_description_wrap = ( - f" --table-description-wrap {table_description_wrap}" - ) - else: - table_description_wrap = "" - - res = exec_manager.cli_run( - f"dbviz -d {self.args.dbname}" - + f" -h {self.args.dbhost}" - + f" -u {self.args.dbuser}" - + f" -p {self.args.dbuserpw}" - + f"{title}" - + f"{includes}" - + f"{excludes}" - + f"{comments}" - + f"{column_description_wrap}" - + f"{table_description_wrap}" - + f" | dot -Tsvg -o {filename}", - # + f" > {filename}.dot", - name=f"Generate Schema Diagram: {name}", - verbose=True, - ) - - # if res.ok: - # exec_manager.cli_run( - # f"dot -Tsvg {filename}.dot -o {filename}", - # name=f"Render Schema Diagram to SVG: {name}", - # verbose=True, - # ) - - return res - - def full_schema_diagram(self) -> exec_manager.Result: - # Create a full Schema Diagram. - return self.dbviz( - "docs/full-schema.svg", - "Full Schema", - self.schema_name(), - excluded_tables=self.full_schema_excluded_tables(), - comments=self.full_schema_comments(), - column_description_wrap=self.full_schema_column_description_wrap(), - table_description_wrap=self.full_schema_table_description_wrap(), - ) - - def migration_schema_diagram(self, ver: int) -> exec_manager.Result: - # Create a schema diagram for an individual migration. - if ver in self.migrations: - migration = self.migrations[ver] - - include_tables = migration.include( - self.all_schema_included_tables(), self.all_schema_excluded_tables() - ) - if include_tables is None: - return exec_manager.Result( - 0, - "", - "", - 0.0, - f"Migration {ver} has no tables to diagram.", - ) - - exclude_tables = migration.exclude(self.all_schema_excluded_tables()) - - title = f"{migration.migration_name}" - if migration.title and len(migration.title) > 0: - title = migration.title - - comments = None - if migration.comments is not None: - comments = migration.comments - else: - comments = self.all_schema_comments() - - return self.dbviz( - f"docs/migration-{ver}.svg", - f"V{ver}__{migration.migration_name}", - title, - included_tables=include_tables, - excluded_tables=exclude_tables, - comments=comments, - column_description_wrap=migration.column_description_wrap, - table_description_wrap=migration.table_description_wrap, - ) - - def create_diagrams(self, results: exec_manager.Results) -> exec_manager.Results: - # Create a full Schema Diagram first. - res = self.full_schema_diagram() - results.add(res) - - for ver in sorted(self.migrations.keys()): - res = self.migration_schema_diagram(ver) - results.add(res) - - # exec_manager.cli_run("ls -al docs", verbose=True) - - return results - def create_markdown_file(self, file_path): with open(file_path, "w") as markdown_file: # Write the title with the maximum migration version @@ -354,26 +58,13 @@ def create_markdown_file(self, file_path): "# Migrations (Version {}) \n\n".format(self.migration_version) ) - # Link the full schema diagram. - markdown_file.write('??? example "Full Schema Diagram"\n\n') - markdown_file.write( - ' ![Full Schema](./full-schema.svg "Full Schema")\n\n' - ) - # Write the contents of each file in order for version in sorted(self.migrations.keys()): migration = self.migrations[version] - sql_data = migration.sql_data.strip() + sql_data = migration["sql_data"].strip() # Write the title of the file - markdown_file.write(f"## {migration.migration_name}\n\n") - - if os.path.exists(f"docs/migration-{version}.svg"): - markdown_file.write('??? example "Schema Diagram"\n\n') - markdown_file.write( - f" ![Migration {migration.migration_name}]" - + f'(./migration-{version}.svg "{migration.migration_name}")\n\n' - ) + markdown_file.write(f"## {migration['migration_name']}\n\n") markdown_file.write('??? abstract "Schema Definition"\n\n') markdown_file.write( @@ -382,7 +73,6 @@ def create_markdown_file(self, file_path): print("Markdown file created successfully at: {}".format(file_path)) - def main(): # Force color output in CI rich.reconfigure(color_system="256") @@ -390,7 +80,6 @@ def main(): parser = argparse.ArgumentParser( description="Standard Postgresql Documentation Processing." ) - parser.add_argument("diagram_config", help="Diagram Configuration JSON") parser.add_argument("--verbose", action="store_true", help="Enable verbose output") db_ops.add_args(parser) @@ -418,21 +107,43 @@ def main(): results.add(res) if res.ok(): - exec_manager.cli_run("mkdir docs") # Where we build the docs. + # Create the docs directory + exec_manager.cli_run("mkdir -p docs") # Where we build the docs. # Get all info about the migrations. migrations = Migrations(args) - results = migrations.create_diagrams(results) if results.ok(): + schemaspy_cmd = ( + f"java -jar /bin/schemaspy.jar -t pgsql11 " + f"-dp /bin/postgresql.jar " + f"-db {args.dbname} " + f"-host {args.dbhost} " + f"-u {args.dbuser} " + f"-p {args.dbuserpw} " + f"-o docs/database_schema/ " + ) + res = exec_manager.cli_run( + schemaspy_cmd, + name="Generate SchemaSpy Documentation", + verbose=True + ) + results.add(res) + + # If SchemaSpy command completes without error, create .pages file to hide the schema folder + if res.ok(): + exec_manager.cli_run( + 'echo "hide: true" > docs/database_schema/.pages', + name="Create .pages file", + verbose=True + ) + migrations.create_markdown_file("docs/migrations.md") - # exec_manager.cli_run("cat /tmp/migrations.md", verbose=True) results.print() if not results.ok(): exit(1) - if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/earthly/postgresql/templates/schema.md b/earthly/postgresql/templates/schema.md new file mode 100644 index 000000000..bb442edc6 --- /dev/null +++ b/earthly/postgresql/templates/schema.md @@ -0,0 +1,64 @@ +--- +icon: material/database +hide: + - navigation + - toc +--- + + + + + + + + + \ No newline at end of file From a4f234673cee4251873e0ccb525a0c5dd5de6e0a Mon Sep 17 00:00:00 2001 From: neil Date: Fri, 11 Oct 2024 09:12:25 +0000 Subject: [PATCH 02/10] chore(tooling): swap out dbviz custom database diagram tool for SchemaSpy, remove dependencies in catalyst-ci postgresql base --- earthly/postgresql/Earthfile | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/earthly/postgresql/Earthfile b/earthly/postgresql/Earthfile index 4d45cfab0..4c21c3eb9 100644 --- a/earthly/postgresql/Earthfile +++ b/earthly/postgresql/Earthfile @@ -128,15 +128,14 @@ docs-base: ARG OPENJDK_VERSION=21 WORKDIR / - - # Setup SchemaSpy requirements, commands combined to reduce layers + + # Setup SchemaSpy requirements RUN echo "deb http://ftp.de.debian.org/debian sid main" > /etc/apt/sources.list && \ apt-get update && \ - apt-get install -y \ - openjdk-${OPENJDK_VERSION}-jre-headless && \ + apt-get install -y openjdk-${OPENJDK_VERSION}-jre-headless && \ + apt-get clean && rm -rf /var/lib/apt/lists/* && \ wget -O /bin/schemaspy.jar https://github.com/schemaspy/schemaspy/releases/download/v${SCHEMASPY_VERSION}/schemaspy-${SCHEMASPY_VERSION}.jar && \ - wget -O /bin/postgresql.jar https://jdbc.postgresql.org/download/postgresql-${POSTGRESQL_JDBC_VERSION}.jar && \ - apt-get clean && rm -rf /var/lib/apt/lists/* + wget -O /bin/postgresql.jar https://jdbc.postgresql.org/download/postgresql-${POSTGRESQL_JDBC_VERSION}.jar # Linter checks for sql files CHECK: From 89da9ffa6d27ee7adb007c6d52bab4b3efe0bef7 Mon Sep 17 00:00:00 2001 From: neil Date: Fri, 11 Oct 2024 13:23:19 +0000 Subject: [PATCH 03/10] chore(tooling): refactored how the dependencies are downloaded and cached to avoid apt install executing every run (reduced network calls) --- earthly/postgresql/Earthfile | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/earthly/postgresql/Earthfile b/earthly/postgresql/Earthfile index 4c21c3eb9..e68af34ea 100644 --- a/earthly/postgresql/Earthfile +++ b/earthly/postgresql/Earthfile @@ -3,6 +3,7 @@ VERSION 0.8 IMPORT ../rust/tools AS rust-tools IMPORT ../../utilities/scripts AS scripts +IMPORT ../java AS java # cspell: words psycopg @@ -21,7 +22,7 @@ postgres-base: colordiff \ findutils \ fontconfig \ - fonts-liberation2 \ + fonts-liberation2 \ graphviz \ libssl-dev \ mold \ @@ -122,20 +123,28 @@ DOCS: docs-base: FROM +postgres-base - # Define ARGs for versions specific to docs - ARG SCHEMASPY_VERSION=6.2.4 - ARG POSTGRESQL_JDBC_VERSION=42.7.4 - ARG OPENJDK_VERSION=21 - WORKDIR / - # Setup SchemaSpy requirements - RUN echo "deb http://ftp.de.debian.org/debian sid main" > /etc/apt/sources.list && \ - apt-get update && \ - apt-get install -y openjdk-${OPENJDK_VERSION}-jre-headless && \ - apt-get clean && rm -rf /var/lib/apt/lists/* && \ - wget -O /bin/schemaspy.jar https://github.com/schemaspy/schemaspy/releases/download/v${SCHEMASPY_VERSION}/schemaspy-${SCHEMASPY_VERSION}.jar && \ - wget -O /bin/postgresql.jar https://jdbc.postgresql.org/download/postgresql-${POSTGRESQL_JDBC_VERSION}.jar + # Copy the JAR files from the SCHEMASPY_ARTIFACTS target + COPY +schemaspy-artifacts/schemaspy.jar /bin/schemaspy.jar + COPY +schemaspy-artifacts/postgresql.jar /bin/postgresql.jar + + # Use the cached java installation from the java Earthfile + DO java+COPY_DEPS + + RUN apt-get clean && rm -rf /var/lib/apt/lists/* + +# seperate target to download and cache schemaspy and postgresql jdbc jars, avoids nework calls on every run +schemaspy-artifacts: + FROM alpine:3.20.3 + ARG SCHEMASPY_VERSION=6.2.4 + ARG POSTGRESQL_JDBC_VERSION=42.7.4 + WORKDIR /build + RUN apk add --no-cache wget && \ + wget -O schemaspy.jar https://github.com/schemaspy/schemaspy/releases/download/v${SCHEMASPY_VERSION}/schemaspy-${SCHEMASPY_VERSION}.jar && \ + wget -O postgresql.jar https://jdbc.postgresql.org/download/postgresql-${POSTGRESQL_JDBC_VERSION}.jar + SAVE ARTIFACT schemaspy.jar /schemaspy.jar + SAVE ARTIFACT postgresql.jar /postgresql.jar # Linter checks for sql files CHECK: From 03ba2932648dc1506cef91bb2c4dbdf980dfc98e Mon Sep 17 00:00:00 2001 From: neil Date: Fri, 11 Oct 2024 13:23:38 +0000 Subject: [PATCH 04/10] chore(tooling): refactored how the dependencies are downloaded and cached to avoid apt install executing every run (reduced network calls) --- earthly/java/Earthfile | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 earthly/java/Earthfile diff --git a/earthly/java/Earthfile b/earthly/java/Earthfile new file mode 100644 index 000000000..4c0268fae --- /dev/null +++ b/earthly/java/Earthfile @@ -0,0 +1,17 @@ +VERSION 0.8 + +# Base image for Java used in other targets to avoid using apt update / get-installs +# Network operations are not cached: Earthly doesn’t cache network +# operations (like apt-get update or wget) because network resources can change over time. +java-base: + FROM openjdk:21-jdk-slim + + SAVE ARTIFACT /usr/local/openjdk-21 /java + +COPY_DEPS: + FUNCTION + COPY +java-base/java /usr/local/openjdk-21 + + # Set environment variables for Java + ENV JAVA_HOME=/usr/local/openjdk-21 + ENV PATH=$JAVA_HOME/bin:$PATH \ No newline at end of file From e0e2d6db81c6d1bc713bb6f116a6078e6fc76ad6 Mon Sep 17 00:00:00 2001 From: neil Date: Mon, 14 Oct 2024 14:52:26 +0000 Subject: [PATCH 05/10] chore(tooling): Fix markdown warnings breaking the CI build. No warnings required on md file as it is just a container for SchemaSpy --- earthly/postgresql/templates/schema.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/earthly/postgresql/templates/schema.md b/earthly/postgresql/templates/schema.md index bb442edc6..db40d5efd 100644 --- a/earthly/postgresql/templates/schema.md +++ b/earthly/postgresql/templates/schema.md @@ -4,7 +4,7 @@ hide: - navigation - toc --- - + -