diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b8af22a..a908a1b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -10,8 +10,13 @@ jobs: runs-on: ubuntu-latest steps: - name: checkout repo content - uses: actions/checkout@v2 - + uses: actions/checkout@v3 + with: + submodules: recursive + - name: Git Submodule Update + run: | + git submodule update --remote --recursive + - name: setup python uses: actions/setup-python@v4 with: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d0dc258..25f2611 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -10,7 +10,13 @@ jobs: runs-on: ubuntu-latest steps: - name: checkout repo content - uses: actions/checkout@v2 # checkout the repository content + uses: actions/checkout@v3 + with: + submodules: recursive + + - name: Git Submodule Update + run: | + git submodule update --remote --recursive - name: setup python uses: actions/setup-python@v4 diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..d4343b3 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "artifact_builder"] + path = artifact_builder + url = git@github.com:viadb/artifact_builder.git diff --git a/artifact_builder b/artifact_builder new file mode 160000 index 0000000..14fa779 --- /dev/null +++ b/artifact_builder @@ -0,0 +1 @@ +Subproject commit 14fa7792491950bf8ac832763006e9915ecc1dba diff --git a/artifact_builder/.gitignore b/artifact_builder/.gitignore deleted file mode 100644 index b0f2192..0000000 --- a/artifact_builder/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -__pycache__ -.venv \ No newline at end of file diff --git a/artifact_builder/Dockerfile b/artifact_builder/Dockerfile deleted file mode 100644 index f133a32..0000000 --- a/artifact_builder/Dockerfile +++ /dev/null @@ -1,6 +0,0 @@ -FROM python:latest - -WORKDIR /usr/app/src -COPY ../. ./ - -CMD [ "python3", "artifact_builder/", "-D", "sql"] \ No newline at end of file diff --git a/artifact_builder/README.md b/artifact_builder/README.md deleted file mode 100644 index e8c1627..0000000 --- a/artifact_builder/README.md +++ /dev/null @@ -1,28 +0,0 @@ -# Artifact Builder - -This section builds a JSON map of the SQL and MD files present on the repository, and structure them in the form of a Opensearh-compatible schema for later import into a searcher endpoint. - -## Execution - -Load virtual env: - -```bash -python3 -m venv venv -source venv/bin/activate -pip3 install -r requirements.txt -``` - -The folowing allows you to index and search: - -```bash -python3 artifact_builder/ -D sql -E -v | jq '.search_query' -``` - -See `python3 artifact_builder/ -h` for more information. - -Default output: index.json (with `--output`) and `--ouput-ndjson` (`ndindex.json`). - - -## TODO - -Modules [potentially] beneficial: [Dataclasses](https://github.com/lidatong/dataclasses-json) diff --git a/artifact_builder/__init__.py b/artifact_builder/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/artifact_builder/__main__.py b/artifact_builder/__main__.py deleted file mode 100644 index 7a89c88..0000000 --- a/artifact_builder/__main__.py +++ /dev/null @@ -1,53 +0,0 @@ -import indexer -from filemap import fileMap -import json -import ndjson -import argparse -from re import sub, escape, MULTILINE -from os import remove - -sqlDirectory = "../sql/" -global _engine -_engine: str - -def main(): - - parser = argparse.ArgumentParser() - parser.add_argument("-D", "--directory", help="Directory to be indexed", type=str, required=False, default=sqlDirectory) - parser.add_argument("-O", "--output", help="Output json artifact", required=False, type=str, default="./index.json") - parser.add_argument("-o", "--output-ndjson", help="Output ndjson artifact", required=False, type=str, default="./ndindex.json") - parser.add_argument("-v", "--verbose", help="Kind of debug", default=False, action=argparse.BooleanOptionalAction) - parser.add_argument("-E", "--engine", help="Engine name", default="postgres", type=str, required=True) - args = parser.parse_args() - - fileMap = indexer.indexDir(args.directory, args.engine) - - if args.verbose: - print(json.dumps(fileMap, indent=1)) - - with open(args.output, 'w+', encoding='utf-8-sig') as f: - json.dump(fileMap, f, indent=1) - - with open(args.output, 'rb') as f: - data = json.load(f) - - """ - Next blocks are doing a nasty thing. They dump into a temporal file the contents of - the data dictionary for escaping the escape character later. This generates an ndjson - compatible with Postgres COPY. - """ - with open(args.output_ndjson + '.temp', 'w+', encoding='utf-8-sig') as f: - writer = ndjson.writer(f) - for key in data: - writer.writerow(data[key]) - - with open(args.output_ndjson, 'w+', encoding='utf-8-sig') as f: - input = open(args.output_ndjson + '.temp') - f.write(sub(r'\\',r'\\\\',input.read())) - input.close() - - remove(args.output_ndjson + '.temp') - -if __name__ == "__main__": - main() - \ No newline at end of file diff --git a/artifact_builder/filemap.py b/artifact_builder/filemap.py deleted file mode 100644 index 7ef511d..0000000 --- a/artifact_builder/filemap.py +++ /dev/null @@ -1,23 +0,0 @@ -from typing import TypedDict, List - -# https://peps.python.org/pep-0589/ - -""" -Class fileMap -This class is intended to set the fields present in the json artifact -for later indexing by the engine. -""" -class fileMap(TypedDict): - engine: str - # Title is either the filename without scores or the title of the .md - title: str - fpath: str - docFPath: str - category: str - query: str - doc: str - versionSupport: List[int] - outputPerVersion: List[str] - -def init(): - pass \ No newline at end of file diff --git a/artifact_builder/indexer.py b/artifact_builder/indexer.py deleted file mode 100644 index 0c3800c..0000000 --- a/artifact_builder/indexer.py +++ /dev/null @@ -1,37 +0,0 @@ -import os -from os.path import join -from re import sub -from filemap import fileMap - -""" -indexDir -This function iterates over the giving directory and constructs the -dictionary of files with .sql and .md extension. -""" -def indexDir(sqlDirectory: str, _engine: str) -> fileMap: - _fileMap: fileMap = {} - - for root, dirs, files in os.walk(sqlDirectory): - for filename in files: - key = sub('.sql|.md', '', filename) - fpath = join(root.removeprefix('../'), filename) - - # For now, we ignore READMEs. But, we might furtherly include some documentation - # artifact. - if key not in _fileMap and filename.removesuffix(".md").lower() not in ('readme', '.gitkeep'): - _fileMap[key]={'engine': _engine} - _fileMap[key]={'title': sub('[_-]'," ", str(key)).capitalize()} - - if filename.endswith(".sql"): - with open(fpath, encoding="utf-8") as f: - _fileMap[key].update({'fpath': fpath, - 'category': root.removeprefix(sqlDirectory + '/'), - 'query': f.read()}) - elif filename.endswith(".md") and filename.removesuffix(".md").lower() not in ('readme', '.gitkeep'): - with open(fpath, encoding="utf-8") as f: - _fileMap[key].update({'docFPath': fpath, - 'doc': f.read()}) - else: - pass - - return _fileMap \ No newline at end of file diff --git a/artifact_builder/requirements.txt b/artifact_builder/requirements.txt deleted file mode 100644 index 1040777..0000000 --- a/artifact_builder/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -ndjson==0.3.1 \ No newline at end of file