From f7dbaa1334ac45f0ab402dc26997633294435efb Mon Sep 17 00:00:00 2001 From: alexau Date: Fri, 1 Dec 2023 21:50:17 +0800 Subject: [PATCH 01/36] Init commit, created the backbone structure for the test --- .env | 5 ++++ .pre-commit-config.yaml | 34 ++++++++++++++++++++++++ Makefile | 10 +++++++ consumer/README.md | 0 consumer/pyproject.toml | 14 ++++++++++ consumer/src/__init__.py | 0 consumer/tests/__init__.py | 0 database/assets/create_records_table.sql | 9 +++++++ database/dockerfile | 4 +++ docker-compose.yml | 18 +++++++++++++ producer/README.md | 0 producer/pyproject.toml | 14 ++++++++++ producer/src/__init__.py | 0 producer/tests/__init__.py | 0 14 files changed, 108 insertions(+) create mode 100644 .env create mode 100644 .pre-commit-config.yaml create mode 100644 Makefile create mode 100644 consumer/README.md create mode 100644 consumer/pyproject.toml create mode 100644 consumer/src/__init__.py create mode 100644 consumer/tests/__init__.py create mode 100644 database/assets/create_records_table.sql create mode 100644 database/dockerfile create mode 100644 docker-compose.yml create mode 100644 producer/README.md create mode 100644 producer/pyproject.toml create mode 100644 producer/src/__init__.py create mode 100644 producer/tests/__init__.py diff --git a/.env b/.env new file mode 100644 index 0000000..cb60bc0 --- /dev/null +++ b/.env @@ -0,0 +1,5 @@ +POSTGRES_VERSION=15.3-alpine3.17 +POSTGRES_PORT=5432 +POSTGRES_USER=postgres +POSTGRES_PASSWORD=postgres +POSTGRES_DATABASE=records \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..fcec73e --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,34 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + - id: check-added-large-files + - id: check-json + - id: check-toml + - id: detect-aws-credentials + args: [--allow-missing-credentials] + - id: detect-private-key + - id: name-tests-test + - repo: https://github.com/psf/black + rev: 23.10.1 + hooks: + - id: black + - repo: local + hooks: + - id: mypy-producer + name: mypy-producer + entry: mypy producer/src + language: system + types: [python] + pass_filenames: false + always_run: true + - id: mypy-consumer + name: mypy-consumer + entry: mypy consumer/src + language: system + types: [python] + pass_filenames: false + always_run: true \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..d59c083 --- /dev/null +++ b/Makefile @@ -0,0 +1,10 @@ +build: + docker compose build +up: + docker compose up +up_d: + docker compose up -d +down: + docker compose down +stats: + docker stats --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.MemPerc}}\t{{.NetIO}}\t{{.BlockIO}}\t{{.PIDs}}" \ No newline at end of file diff --git a/consumer/README.md b/consumer/README.md new file mode 100644 index 0000000..e69de29 diff --git a/consumer/pyproject.toml b/consumer/pyproject.toml new file mode 100644 index 0000000..c5e47ea --- /dev/null +++ b/consumer/pyproject.toml @@ -0,0 +1,14 @@ +[tool.poetry] +name = "consumer" +version = "0.1.0" +description = "" +authors = ["alexau "] +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.11" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/consumer/src/__init__.py b/consumer/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/consumer/tests/__init__.py b/consumer/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/database/assets/create_records_table.sql b/database/assets/create_records_table.sql new file mode 100644 index 0000000..e96c176 --- /dev/null +++ b/database/assets/create_records_table.sql @@ -0,0 +1,9 @@ +CREATE TABLE IF NOT EXISTS records ( + record_time TIMESTAMPTZ NOT NULL, + sensor_id CHAR(64) NOT NULL, + value DOUBLE PRECISION NOT NULL, + PRIMARY KEY(record_time, sensor_id) +); + +CREATE INDEX IF NOT EXISTS idx_records_record_time ON records USING BTREE (record_time); +CREATE INDEX IF NOT EXISTS idx_records_record_time ON records USING HASH (sensor_id); \ No newline at end of file diff --git a/database/dockerfile b/database/dockerfile new file mode 100644 index 0000000..3eeee05 --- /dev/null +++ b/database/dockerfile @@ -0,0 +1,4 @@ +ARG POSTGRES_VERSION +FROM docker.io/postgres:${POSTGRES_VERSION} +USER postgres +COPY ./assets/create_records_table.sql /docker-entrypoint-initdb.d/init.sql diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..6074213 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,18 @@ +version: '3.8' +services: + records_postgres: + image: postgres:${POSTGRES_VERSION} + container_name: records_postgres + build: + context: database + dockerfile: dockerfile + args: + POSTGRES_VERSION: ${POSTGRES_VERSION} + environment: + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} + POSTGRES_USER: ${POSTGRES_USER} + POSTGRES_DB: ${POSTGRES_DATABASE} + ports: + - ${POSTGRES_PORT}:5432 + restart: always + diff --git a/producer/README.md b/producer/README.md new file mode 100644 index 0000000..e69de29 diff --git a/producer/pyproject.toml b/producer/pyproject.toml new file mode 100644 index 0000000..85c6656 --- /dev/null +++ b/producer/pyproject.toml @@ -0,0 +1,14 @@ +[tool.poetry] +name = "producer" +version = "0.1.0" +description = "" +authors = ["alexau "] +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.11" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/producer/src/__init__.py b/producer/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/producer/tests/__init__.py b/producer/tests/__init__.py new file mode 100644 index 0000000..e69de29 From b92505f42f9d64ba083ec31c83fc10ec2127aadc Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 00:18:42 +0800 Subject: [PATCH 02/36] Small Commit --- .env | 12 +- .pre-commit-config.yaml | 61 ++++++++++ consumer/pyproject.toml | 2 +- database/assets/create_records_table.sql | 2 +- database/dockerfile | 4 +- docker-compose.yml | 35 +++++- producer/dockerfile.prod | 35 ++++++ producer/poetry.lock | 22 ++++ producer/pyproject.toml | 3 +- producer/src/adapters/__init__.py | 0 .../src/adapters/upsert_filenames/__init__.py | 0 .../src/adapters/upsert_filenames/rabbitmq.py | 114 ++++++++++++++++++ producer/src/deployments/__init__.py | 0 producer/src/deployments/script/__init__.py | 0 producer/src/deployments/script/config.py | 21 ++++ producer/src/deployments/script/main.py | 36 ++++++ .../src/deployments/script/setup_logging.py | 53 ++++++++ producer/src/usecases/__init__.py | 1 + producer/src/usecases/upsert_filenames.py | 25 ++++ 19 files changed, 416 insertions(+), 10 deletions(-) create mode 100644 producer/dockerfile.prod create mode 100644 producer/poetry.lock create mode 100644 producer/src/adapters/__init__.py create mode 100644 producer/src/adapters/upsert_filenames/__init__.py create mode 100644 producer/src/adapters/upsert_filenames/rabbitmq.py create mode 100644 producer/src/deployments/__init__.py create mode 100644 producer/src/deployments/script/__init__.py create mode 100644 producer/src/deployments/script/config.py create mode 100644 producer/src/deployments/script/main.py create mode 100644 producer/src/deployments/script/setup_logging.py create mode 100644 producer/src/usecases/__init__.py create mode 100644 producer/src/usecases/upsert_filenames.py diff --git a/.env b/.env index cb60bc0..15594a8 100644 --- a/.env +++ b/.env @@ -1,5 +1,13 @@ -POSTGRES_VERSION=15.3-alpine3.17 +POSTGRES_VERSION_TAG=15.3-alpine3.17 POSTGRES_PORT=5432 POSTGRES_USER=postgres POSTGRES_PASSWORD=postgres -POSTGRES_DATABASE=records \ No newline at end of file +POSTGRES_DATABASE=records + +RABBITMQ_VERSION_TAG=3.12.10-management +RABBITMQ_USER=rabbitmq +RABBITMQ_PASSWORD=rabbitmq +RABBITMQ_PORT=5672 +RABBITMQ_WEBAPP_PORT=15672 + +QUEUE_NAME=filenames \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fcec73e..61b7963 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,6 +16,7 @@ repos: rev: 23.10.1 hooks: - id: black + - repo: - repo: local hooks: - id: mypy-producer @@ -31,4 +32,64 @@ repos: language: system types: [python] pass_filenames: false + always_run: true + - id: poetry-check + name: poetry check producer + args: ["-C", "./producer"] + - id: poetry-lock + name: poetry lock producer + args: ["-C", "./producer"] + - id: poetry-export + name: poetry export producer dev dependencies + args: [ + "-C", + "./producer", + "-f", "requirements.txt", + "-o", + "./producer/requirements-dev.txt", + "--without-hashes", + "--with", + "dev" + ] + always_run: true + - id: poetry-export + name: poetry export producer dependencies + args: [ + "-C", + "./producer", + "-f", "requirements.txt", + "-o", + "./producer/requirements.txt", + "--without-hashes" + ] + always_run: true + - id: poetry-check + name: poetry check consumer + args: ["-C", "./consumer"] + - id: poetry-lock + name: poetry lock consumer + args: ["-C", "./consumer"] + - id: poetry-export + name: poetry export consumer dev dependencies + args: [ + "-C", + "./consumer", + "-f", "requirements.txt", + "-o", + "./consumer/requirements-dev.txt", + "--without-hashes", + "--with", + "dev" + ] + always_run: true + - id: poetry-export + name: poetry export consumer dependencies + args: [ + "-C", + "./consumer", + "-f", "requirements.txt", + "-o", + "./consumer/requirements.txt", + "--without-hashes" + ] always_run: true \ No newline at end of file diff --git a/consumer/pyproject.toml b/consumer/pyproject.toml index c5e47ea..1045eb4 100644 --- a/consumer/pyproject.toml +++ b/consumer/pyproject.toml @@ -6,7 +6,7 @@ authors = ["alexau "] readme = "README.md" [tool.poetry.dependencies] -python = "^3.11" +python = "^3.12" [build-system] diff --git a/database/assets/create_records_table.sql b/database/assets/create_records_table.sql index e96c176..901480f 100644 --- a/database/assets/create_records_table.sql +++ b/database/assets/create_records_table.sql @@ -6,4 +6,4 @@ CREATE TABLE IF NOT EXISTS records ( ); CREATE INDEX IF NOT EXISTS idx_records_record_time ON records USING BTREE (record_time); -CREATE INDEX IF NOT EXISTS idx_records_record_time ON records USING HASH (sensor_id); \ No newline at end of file +CREATE INDEX IF NOT EXISTS idx_records_sensor_id ON records USING BTREE (sensor_id); \ No newline at end of file diff --git a/database/dockerfile b/database/dockerfile index 3eeee05..021ced9 100644 --- a/database/dockerfile +++ b/database/dockerfile @@ -1,4 +1,4 @@ -ARG POSTGRES_VERSION -FROM docker.io/postgres:${POSTGRES_VERSION} +ARG POSTGRES_VERSION_TAG +FROM docker.io/postgres:${POSTGRES_VERSION_TAG} USER postgres COPY ./assets/create_records_table.sql /docker-entrypoint-initdb.d/init.sql diff --git a/docker-compose.yml b/docker-compose.yml index 6074213..b2e41d1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,13 +1,13 @@ version: '3.8' services: records_postgres: - image: postgres:${POSTGRES_VERSION} + image: records_postgres:${POSTGRES_VERSION_TAG} container_name: records_postgres build: context: database dockerfile: dockerfile args: - POSTGRES_VERSION: ${POSTGRES_VERSION} + POSTGRES_VERSION_TAG: ${POSTGRES_VERSION_TAG} environment: POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} POSTGRES_USER: ${POSTGRES_USER} @@ -15,4 +15,33 @@ services: ports: - ${POSTGRES_PORT}:5432 restart: always - + records_rabbitmq: + image: rabbitmq:${RABBITMQ_VERSION_TAG} + container_name: records_rabbitmq + environment: + RABBITMQ_DEFAULT_USER: ${RABBITMQ_USER} + RABBITMQ_DEFAULT_PASS: ${RABBITMQ_PASSWORD} + ports: + - ${RABBITMQ_WEBAPP_PORT}:15672 + - ${RABBITMQ_PORT}:5672 + restart: always + records_producer: + image: records_producer:latest + container_name: records_producer + build: + context: producer + dockerfile: dockerfile.prod + args: + AMAZON_LINUX_VERSION_TAG: ${AMAZON_LINUX_VERSION_TAG} + environment: + TARGET_FILE_DIR: ${TARGET_FILE_DIR} + TARGET_FILE_EXTENSION: ${TARGET_FILE_EXTENSION} + LOG_LEVEL: ${LOG_LEVEL} + LOG_FORMAT: ${LOG_FORMAT} + LOG_DATE_FORMAT: ${LOG_DATE_FORMAT} + RABBITMQ_HOST: records_rabbitmq + RABBITMQ_PORT: ${RABBITMQ_PORT} + RABBITMQ_USER: ${RABBITMQ_USER} + RABBITMQ_PASSWORD: ${RABBITMQ_PASSWORD} + RABBITMQ_QUEUE: ${QUEUE_NAME} + diff --git a/producer/dockerfile.prod b/producer/dockerfile.prod new file mode 100644 index 0000000..08f7dae --- /dev/null +++ b/producer/dockerfile.prod @@ -0,0 +1,35 @@ +ARG AMAZON_LINUX_VERSION_TAG +FROM amazonlinux:${AMAZON_LINUX_VERSION_TAG} as build +RUN yum update -y && \ + yum install -y \ + python3.12 \ + python3-pip \ + python3-devel \ + shadow-utils && \ + yum clean all + +RUN adduser app + +USER app +ENV HOME=/home/app +WORKDIR ${HOME} + +COPY requirements.txt . +RUN pip3 install --user -r requirements.txt + +FROM amazonlinux:${AMAZON_LINUX_VERSION_TAG} as runtime +RUN yum update -y && \ + yum install -y \ + python3.12 \ + shadow-utils && \ + yum clean all +RUN adduser app + +USER app +ENV HOME=/home/app +WORKDIR ${HOME} + +COPY --from=build ${HOME}/.local ${HOME}/.local + +COPY src/ . +CMD python3.12 -m deployments.script.main \ No newline at end of file diff --git a/producer/poetry.lock b/producer/poetry.lock new file mode 100644 index 0000000..c798a25 --- /dev/null +++ b/producer/poetry.lock @@ -0,0 +1,22 @@ +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. + +[[package]] +name = "pika" +version = "1.3.2" +description = "Pika Python AMQP Client Library" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pika-1.3.2-py3-none-any.whl", hash = "sha256:0779a7c1fafd805672796085560d290213a465e4f6f76a6fb19e378d8041a14f"}, + {file = "pika-1.3.2.tar.gz", hash = "sha256:b2a327ddddf8570b4965b3576ac77091b850262d34ce8c1d8cb4e4146aa4145f"}, +] + +[package.extras] +gevent = ["gevent"] +tornado = ["tornado"] +twisted = ["twisted"] + +[metadata] +lock-version = "2.0" +python-versions = "^3.12" +content-hash = "8c34279cc0de5600b684c04804c053018d625919406026acf7c64f821929205e" diff --git a/producer/pyproject.toml b/producer/pyproject.toml index 85c6656..1c97623 100644 --- a/producer/pyproject.toml +++ b/producer/pyproject.toml @@ -6,7 +6,8 @@ authors = ["alexau "] readme = "README.md" [tool.poetry.dependencies] -python = "^3.11" +python = "^3.12" +pika = "^1.3.2" [build-system] diff --git a/producer/src/adapters/__init__.py b/producer/src/adapters/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/producer/src/adapters/upsert_filenames/__init__.py b/producer/src/adapters/upsert_filenames/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/producer/src/adapters/upsert_filenames/rabbitmq.py b/producer/src/adapters/upsert_filenames/rabbitmq.py new file mode 100644 index 0000000..a75389c --- /dev/null +++ b/producer/src/adapters/upsert_filenames/rabbitmq.py @@ -0,0 +1,114 @@ +from contextlib import contextmanager +from usecases import UpsertFilenamesClient +import pika +from pika.channel import Channel +from pika.connection import Connection +from typing import Iterator, Optional, override, overload +import logging + +class RabbitMQUpsertFilenamesClient(UpsertFilenamesClient): + def __init__( + self, + host: str, + port: int, + username: str, + password: str, + queue: str = 'filenames', + ) -> None: + credentials = pika.PlainCredentials(username, password) + self._conn_parameters = pika.ConnectionParameters( + host=host, + port=port, + credentials=credentials, + ) + self._queue = queue + self._conn: Optional[Connection] = None + + @overload + def upsert(self, filename: str) -> bool: + ... + + @overload + def upsert(self, filename: list[str]) -> bool: + ... + + @override + def upsert(self, filename: str | list[str]) -> bool | list[bool]: + if isinstance(filename, str): + return self._upsert_single(filename) + return self._upsert_batch(filename) + + @override + def upsert_stream(self, filename_iterator: Iterator[str]) -> dict[str, bool]: + successes_map: dict[str, bool] = {} + try: + for filename in filename_iterator: + success = self._upsert_single(filename) + successes_map[filename] = success + except Exception as e: + logging.exception(e) + return successes_map + + @contextmanager + def _get_amqp_conn(self) -> Iterator[pika.BaseConnection]: + if self._conn is None or self._conn.is_closed: + self._conn = pika.BlockingConnection(self._conn_parameters) + yield self._conn + + def _publish_one(self, channel: Channel, filename: str) -> None: + channel.basic_publish( + exchange='', + routing_key=self._queue, + body=filename, + properties=pika.BasicProperties( + delivery_mode=pika.DeliveryMode.Persistent + ), + ) + + def _upsert_single(self, filename: str) -> bool: + try: + with self._get_amqp_conn() as connection: + channel = connection.channel() + channel.queue_declare( + queue=self._queue, + durable=True, + ) + channel.confirm_delivery() + self._publish_one(channel, filename) + return True + except Exception as e: + logging.exception(e) + return False + + def _upsert_batch(self, filenames: list[str]) -> list[bool]: + successes = [] + try: + with self._get_amqp_conn() as connection: + channel = connection.channel() + channel.queue_declare( + queue=self._queue, + durable=True, + ) + for filename in filenames: + try: + self._publish_one(channel, filename) + successes.append(True) + except Exception as e: + logging.exception(e) + successes.append(False) + except Exception as e: + logging.exception(e) + return [False] * len(filenames) + return successes + + @override + def close(self) -> bool: + try: + if self._conn is not None: + self._conn.close() + return True + return False + except Exception as e: + logging.exception(e) + return False + \ No newline at end of file diff --git a/producer/src/deployments/__init__.py b/producer/src/deployments/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/producer/src/deployments/script/__init__.py b/producer/src/deployments/script/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/producer/src/deployments/script/config.py b/producer/src/deployments/script/config.py new file mode 100644 index 0000000..76fd068 --- /dev/null +++ b/producer/src/deployments/script/config.py @@ -0,0 +1,21 @@ +import os + +class ProjectConfig: + TARGET_FILE_DIR = os.getenv('TARGET_FILE_DIR', '/tmp') + TARGET_FILE_EXTENSION = os.getenv('TARGET_FILE_EXTENSION', '.csv') + +class LoggingConfig: + LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO') + LOG_FORMAT = os.getenv('LOG_FORMAT', '%(asctime)s - %(name)s - %(levelname)s - %(message)s') + LOG_DATE_FORMAT = os.getenv('LOG_DATE_FORMAT', '%Y-%m-%d %H:%M:%S') + LOG_DIR = os.getenv('LOG_DIR', '/tmp') + LOG_RETENTION = os.getenv('LOG_RETENTION', '7') + LOG_ROTATION = os.getenv('LOG_ROTATION', 'midnight') + +class RabbitMQConfig: + HOST = os.getenv('RABBITMQ_HOST', 'localhost') + PORT = int(os.getenv('RABBITMQ_PORT', 5672)) + USERNAME = os.getenv('RABBITMQ_USERNAME', 'guest') + PASSWORD = os.getenv('RABBITMQ_PASSWORD', 'guest') + QUEUE = os.getenv('RABBITMQ_QUEUE', 'filenames') + \ No newline at end of file diff --git a/producer/src/deployments/script/main.py b/producer/src/deployments/script/main.py new file mode 100644 index 0000000..16fcf1d --- /dev/null +++ b/producer/src/deployments/script/main.py @@ -0,0 +1,36 @@ +import pathlib +from typing import Iterator +from adapters.upsert_filenames.rabbitmq import RabbitMQUpsertFilenamesClient +from .config import RabbitMQConfig, ProjectConfig +from .setup_logging import setup_logging +import logging + +setup_logging() + +upsert_filenames_client = RabbitMQUpsertFilenamesClient( + host=RabbitMQConfig.HOST, + port=RabbitMQConfig.PORT, + username=RabbitMQConfig.USERNAME, + password=RabbitMQConfig.PASSWORD, + queue=RabbitMQConfig.QUEUE, +) + +def traverse_files() -> Iterator[str]: + for filename in pathlib.Path(ProjectConfig.TARGET_FILE_DIR).glob(f'*{ProjectConfig.TARGET_FILE_EXTENSION}'): + yield filename + +def main() -> None: + try: + successes_map = upsert_filenames_client.upsert_stream(traverse_files()) + failed_filenames = [filename for filename, success in successes_map.items() if not success] + if failed_filenames: + raise Exception(f'Failed to upsert filenames: {failed_filenames}') + except Exception as e: + logging.exception(e) + raise e + finally: + upsert_filenames_client.close() + +if __name__ == '__main__': + main() + \ No newline at end of file diff --git a/producer/src/deployments/script/setup_logging.py b/producer/src/deployments/script/setup_logging.py new file mode 100644 index 0000000..55fc1d2 --- /dev/null +++ b/producer/src/deployments/script/setup_logging.py @@ -0,0 +1,53 @@ +import logging +from logging.handlers import TimedRotatingFileHandler +from config import LoggingConfig +import pathlib + +def setup_logging() -> None: + LOG_LEVEL_INT = getattr(logging, LoggingConfig.LOG_LEVEL.upper(), None) + + pathlib.Path(LoggingConfig.LOG_DIR).mkdir(parents=True, exist_ok=True) + + handlers = [] + + stream_handler = logging.StreamHandler() + stream_handler.setFormatter(logging.Formatter(LoggingConfig.LOG_FORMAT, datefmt = LoggingConfig.LOG_DATE_FORMAT)) + stream_handler.setLevel(LoggingConfig.LOG_LEVEL) + handlers.append(stream_handler) + + if LOG_LEVEL_INT is not None and LOG_LEVEL_INT <= logging.INFO: + info_handler = TimedRotatingFileHandler( + filename=f'{LoggingConfig.LOG_DIR}/info.log', + when=LoggingConfig.LOG_ROTATION, + interval=1, + backupCount=LoggingConfig.LOG_RETENTION, + ) + info_handler.setFormatter(logging.Formatter(LoggingConfig.LOG_FORMAT, datefmt = LoggingConfig.LOG_DATE_FORMAT)) + info_handler.setLevel(logging.INFO) + handlers.append(info_handler) + + if LOG_LEVEL_INT is not None and LOG_LEVEL_INT <= logging.WARNING: + warning_handler = TimedRotatingFileHandler( + filename=f'{LoggingConfig.LOG_DIR}/warning.log', + when=LoggingConfig.LOG_ROTATION, + interval=1, + backupCount=LoggingConfig.LOG_RETENTION, + ) + warning_handler.setFormatter(logging.Formatter(LoggingConfig.LOG_FORMAT, datefmt = LoggingConfig.LOG_DATE_FORMAT)) + warning_handler.setLevel(logging.WARNING) + handlers.append(warning_handler) + + if LOG_LEVEL_INT is not None and LOG_LEVEL_INT <= logging.ERROR: + error_handler = TimedRotatingFileHandler( + filename=f'{LoggingConfig.LOG_DIR}/error.log', + when=LoggingConfig.LOG_ROTATION, + interval=1, + backupCount=LoggingConfig.LOG_RETENTION, + ) + error_handler.setFormatter(logging.Formatter(LoggingConfig.LOG_FORMAT, datefmt = LoggingConfig.LOG_DATE_FORMAT)) + error_handler.setLevel(logging.ERROR) + handlers.append(error_handler) + + root_logger = logging.getLogger() + root_logger.setLevel(LoggingConfig.LOG_LEVEL) + root_logger.handlers = handlers \ No newline at end of file diff --git a/producer/src/usecases/__init__.py b/producer/src/usecases/__init__.py new file mode 100644 index 0000000..9596dae --- /dev/null +++ b/producer/src/usecases/__init__.py @@ -0,0 +1 @@ +from .upsert_filenames import UpsertFilenamesClient \ No newline at end of file diff --git a/producer/src/usecases/upsert_filenames.py b/producer/src/usecases/upsert_filenames.py new file mode 100644 index 0000000..107f9cf --- /dev/null +++ b/producer/src/usecases/upsert_filenames.py @@ -0,0 +1,25 @@ +from abc import ABC, abstractmethod +from typing import Iterator, overload + +class UpsertFilenamesClient(ABC): + + @overload + def upsert(self, filename: str) -> bool: + ... + + @overload + def upsert(self, filename: list[str]) -> list[bool]: + ... + + @abstractmethod + def upsert(self, filename: str | list[str]) -> bool | list[bool]: + ... + + @abstractmethod + def upsert_stream(self, filename_iterator: Iterator[str]) -> dict[str, bool]: + ... + + @abstractmethod + def close(self) -> bool: + ... + \ No newline at end of file From 259b4f43bbcb74e3e975cf359468e8c8b4d4619d Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 12:57:35 +0800 Subject: [PATCH 03/36] Adding tests for producer and consumer's code --- .env | 25 +- .github/workflows/test.yml | 69 ++++ .pre-commit-config.yaml | 23 +- Makefile | 7 +- .../dockerfile.prod => consumer/dockerfile | 7 +- consumer/poetry.lock | 314 ++++++++++++++++++ consumer/pyproject.toml | 10 +- consumer/requirements-dev.txt | 12 + consumer/requirements.txt | 3 + consumer/reuqirements-dev.txt | 2 + .../src/adapters}/__init__.py | 0 .../src/adapters/fetch_filenames/__init__.py | 0 .../src/adapters/fetch_filenames/rabbitmq.py | 88 +++++ .../file_parse_iot_records/__init__.py | 0 .../adapters/file_parse_iot_records/csv.py | 97 ++++++ .../adapters/upsert_iot_records/__init__.py | 0 .../adapters/upsert_iot_records/postgres.py | 138 ++++++++ consumer/src/deployments/__init__.py | 0 consumer/src/deployments/scripts/__init__.py | 0 consumer/src/deployments/scripts/config.py | 37 +++ consumer/src/deployments/scripts/main.py | 59 ++++ .../src/deployments/scripts/setup_logging.py | 70 ++++ consumer/src/entities/__init__.py | 1 + consumer/src/entities/iot_record.py | 10 + consumer/src/usecases/__init__.py | 3 + consumer/src/usecases/fetch_filenames.py | 12 + .../src/usecases/file_parse_iot_records.py | 27 ++ consumer/src/usecases/upsert_iot_records.py | 21 ++ docker-compose.yml | 41 ++- producer/dockerfile | 36 ++ producer/poetry.lock | 204 +++++++++++- producer/pyproject.toml | 7 +- producer/requirements-dev.txt | 10 + producer/requirements.txt | 1 + .../adapters/publish_filenames/__init__.py | 0 .../rabbitmq.py | 89 +++-- producer/src/deployments/script/main.py | 33 +- .../src/deployments/script/setup_logging.py | 37 ++- producer/src/usecases/__init__.py | 2 +- producer/src/usecases/publish_filenames.py | 20 ++ producer/src/usecases/upsert_filenames.py | 25 -- producer/tests/test_adapters/__init__.py | 0 .../test_publish_filenames/__init__.py | 0 .../test_publish_filenames/conftest.py | 13 + .../test_publish_filenames/test_rabbitmq.py | 85 +++++ 45 files changed, 1511 insertions(+), 127 deletions(-) create mode 100644 .github/workflows/test.yml rename producer/dockerfile.prod => consumer/dockerfile (86%) create mode 100644 consumer/poetry.lock create mode 100644 consumer/requirements-dev.txt create mode 100644 consumer/requirements.txt create mode 100644 consumer/reuqirements-dev.txt rename {producer/src/adapters/upsert_filenames => consumer/src/adapters}/__init__.py (100%) create mode 100644 consumer/src/adapters/fetch_filenames/__init__.py create mode 100644 consumer/src/adapters/fetch_filenames/rabbitmq.py create mode 100644 consumer/src/adapters/file_parse_iot_records/__init__.py create mode 100644 consumer/src/adapters/file_parse_iot_records/csv.py create mode 100644 consumer/src/adapters/upsert_iot_records/__init__.py create mode 100644 consumer/src/adapters/upsert_iot_records/postgres.py create mode 100644 consumer/src/deployments/__init__.py create mode 100644 consumer/src/deployments/scripts/__init__.py create mode 100644 consumer/src/deployments/scripts/config.py create mode 100644 consumer/src/deployments/scripts/main.py create mode 100644 consumer/src/deployments/scripts/setup_logging.py create mode 100644 consumer/src/entities/__init__.py create mode 100644 consumer/src/entities/iot_record.py create mode 100644 consumer/src/usecases/__init__.py create mode 100644 consumer/src/usecases/fetch_filenames.py create mode 100644 consumer/src/usecases/file_parse_iot_records.py create mode 100644 consumer/src/usecases/upsert_iot_records.py create mode 100644 producer/dockerfile create mode 100644 producer/requirements-dev.txt create mode 100644 producer/requirements.txt create mode 100644 producer/src/adapters/publish_filenames/__init__.py rename producer/src/adapters/{upsert_filenames => publish_filenames}/rabbitmq.py (52%) create mode 100644 producer/src/usecases/publish_filenames.py delete mode 100644 producer/src/usecases/upsert_filenames.py create mode 100644 producer/tests/test_adapters/__init__.py create mode 100644 producer/tests/test_adapters/test_publish_filenames/__init__.py create mode 100644 producer/tests/test_adapters/test_publish_filenames/conftest.py create mode 100644 producer/tests/test_adapters/test_publish_filenames/test_rabbitmq.py diff --git a/.env b/.env index 15594a8..a97e897 100644 --- a/.env +++ b/.env @@ -3,6 +3,7 @@ POSTGRES_PORT=5432 POSTGRES_USER=postgres POSTGRES_PASSWORD=postgres POSTGRES_DATABASE=records +POSTGRES_BATCH_UPSERT_SIZE=1000 RABBITMQ_VERSION_TAG=3.12.10-management RABBITMQ_USER=rabbitmq @@ -10,4 +11,26 @@ RABBITMQ_PASSWORD=rabbitmq RABBITMQ_PORT=5672 RABBITMQ_WEBAPP_PORT=15672 -QUEUE_NAME=filenames \ No newline at end of file +QUEUE_NAME=filenames + +AMAZON_LINUX_VERSION_TAG=2023.2.20231113.0 + +TARGET_FILE_DIR=./records +TARGET_FILE_EXTENSION=.csv + +PRODUCER_LOG_LEVEL=INFO +PRODUCER_LOG_FORMAT="[%(asctime)s | %(levelname)s | %(name)s] {%(filename)s:%(lineno)d} >> %(message)s" +PRODUCER_LOG_DATE_FORMAT="%Y-%m-%d %H:%M:%S" +PRODUCER_LOG_DIR=./logs/producer +PRODUCER_LOG_RETENTION=7 +PRODUCER_LOG_ROTATION=midnight + +CONSUMER_LOG_LEVEL=INFO +CONSUMER_LOG_FORMAT="[%(asctime)s | %(levelname)s | %(name)s] {%(filename)s:%(lineno)d} >> %(message)s" +CONSUMER_LOG_DATE_FORMAT="%Y-%m-%d %H:%M:%S" +CONSUMER_LOG_DIR=./logs/producer +CONSUMER_LOG_RETENTION=7 +CONSUMER_LOG_ROTATION=midnight + +CSV_PARSER_RECOGNIZED_DATETIME_FORMATS="%Y-%m-%dT%H:%M:%S.%fZ" +CSV_PARSER_DELIMITER="," diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..6fe79d9 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,69 @@ +name: Producer Consumer CI Test +on: + push: + branches: ["dev"] + paths: + - 'env/test/**' + - 'modules/**' + workflow_dispatch: +jobs: + load-dotenv: + runs-on: ubuntu-latest + outputs: + postgres-version-tag: ${{ steps.load-dotenv.outputs.postgres-version-tag }} + postgres-port: ${{ steps.load-dotenv.outputs.postgres-port }} + postgres-user: ${{ steps.load-dotenv.outputs.postgres-user }} + postgres-password: ${{ steps.load-dotenv.outputs.postgres-password }} + postgres-database: ${{ steps.load-dotenv.outputs.postgres-database }} + rabbitmq-version-tag: ${{ steps.load-dotenv.outputs.rabbitmq-version-tag }} + rabbitmq-port: ${{ steps.load-dotenv.outputs.rabbitmq-port }} + rabbitmq-user: ${{ steps.load-dotenv.outputs.rabbitmq-user }} + rabbitmq-password: ${{ steps.load-dotenv.outputs.rabbitmq-password }} + queue-name: ${{ steps.load-dotenv.outputs.queue-name }} + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Load dotenv + id: load-dotenv + run: | + set -o allexport + source .env + set +o allexport + echo "POSTGRES_VERSION_TAG=$POSTGRES_VERSION_TAG" >> $GITHUB_OUTPUT + echo "POSTGRES_PORT=$POSTGRES_PORT" >> $GITHUB_OUTPUT + echo "POSTGRES_USER=$POSTGRES_USER" >> $GITHUB_OUTPUT + echo "POSTGRES_PASSWORD=$POSTGRES_PASSWORD" >> $GITHUB_OUTPUT + echo "POSTGRES_DATABASE=$POSTGRES_DATABASE" >> $GITHUB_OUTPUT + echo "RABBITMQ_VERSION_TAG=$RABBITMQ_VERSION_TAG" >> $GITHUB_OUTPUT + echo "RABBITMQ_PORT=$RABBITMQ_PORT" >> $GITHUB_OUTPUT + echo "RABBITMQ_USER=$RABBITMQ_USER" >> $GITHUB_OUTPUT + echo "RABBITMQ_PASSWORD=$RABBITMQ_PASSWORD" >> $GITHUB_OUTPUT + echo "QUEUE_NAME=$QUEUE_NAME" >> $GITHUB_OUTPUT + test-producer: + needs: load-dotenv + runs-on: ubuntu-latest + services: + rabbitmq: + image: rabbitmq:${{ needs.load-dotenv.outputs.rabbitmq-version-tag }} + env: + RABBITMQ_DEFAULT_USER: ${{ needs.load-dotenv.outputs.rabbitmq-user }} + RABBITMQ_DEFAULT_PASS: ${{ needs.load-dotenv.outputs.rabbitmq-password }} + options: >- + --health-cmd "rabbitmq-diagnostics -q check_running" + --health-interval 5s + --health-timeout 30s + --health-retries 3 + steps: + - name: Checkout + uses: actions/checkout@v4 + - uses: actions/setup-python@v4 + with: + python-version: '3.11' + cache: 'pip' + cache-dependency-path: requirements-dev.txt + - name: Install dependencies + run: | + pip install -r requirements-dev.txt + - name: Run tests + run: | + coverage run -m pytest -v diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 61b7963..cb45f56 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,28 +11,13 @@ repos: - id: detect-aws-credentials args: [--allow-missing-credentials] - id: detect-private-key - - id: name-tests-test - repo: https://github.com/psf/black - rev: 23.10.1 + rev: 23.11.0 hooks: - id: black - - repo: - - repo: local + - repo: https://github.com/python-poetry/poetry + rev: 1.7.0 hooks: - - id: mypy-producer - name: mypy-producer - entry: mypy producer/src - language: system - types: [python] - pass_filenames: false - always_run: true - - id: mypy-consumer - name: mypy-consumer - entry: mypy consumer/src - language: system - types: [python] - pass_filenames: false - always_run: true - id: poetry-check name: poetry check producer args: ["-C", "./producer"] @@ -92,4 +77,4 @@ repos: "./consumer/requirements.txt", "--without-hashes" ] - always_run: true \ No newline at end of file + always_run: true diff --git a/Makefile b/Makefile index d59c083..4346ca3 100644 --- a/Makefile +++ b/Makefile @@ -7,4 +7,9 @@ up_d: down: docker compose down stats: - docker stats --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.MemPerc}}\t{{.NetIO}}\t{{.BlockIO}}\t{{.PIDs}}" \ No newline at end of file + docker stats --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.MemPerc}}\t{{.NetIO}}\t{{.BlockIO}}\t{{.PIDs}}" +export_requirements: + cd producer && \ + poetry export -f requirements.txt --output requirements.txt --without-hashes && \ + cd ../consumer && \ + poetry export -f requirements.txt --output requirements.txt --without-hashes diff --git a/producer/dockerfile.prod b/consumer/dockerfile similarity index 86% rename from producer/dockerfile.prod rename to consumer/dockerfile index 08f7dae..b343433 100644 --- a/producer/dockerfile.prod +++ b/consumer/dockerfile @@ -2,7 +2,7 @@ ARG AMAZON_LINUX_VERSION_TAG FROM amazonlinux:${AMAZON_LINUX_VERSION_TAG} as build RUN yum update -y && \ yum install -y \ - python3.12 \ + python3.11 \ python3-pip \ python3-devel \ shadow-utils && \ @@ -20,7 +20,8 @@ RUN pip3 install --user -r requirements.txt FROM amazonlinux:${AMAZON_LINUX_VERSION_TAG} as runtime RUN yum update -y && \ yum install -y \ - python3.12 \ + python3.11 \ + python3-pip \ shadow-utils && \ yum clean all RUN adduser app @@ -32,4 +33,4 @@ WORKDIR ${HOME} COPY --from=build ${HOME}/.local ${HOME}/.local COPY src/ . -CMD python3.12 -m deployments.script.main \ No newline at end of file +CMD python3.11 -m deployments.script.main diff --git a/consumer/poetry.lock b/consumer/poetry.lock new file mode 100644 index 0000000..04b864c --- /dev/null +++ b/consumer/poetry.lock @@ -0,0 +1,314 @@ +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "coverage" +version = "7.3.2" +description = "Code coverage measurement for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "coverage-7.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d872145f3a3231a5f20fd48500274d7df222e291d90baa2026cc5152b7ce86bf"}, + {file = "coverage-7.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:310b3bb9c91ea66d59c53fa4989f57d2436e08f18fb2f421a1b0b6b8cc7fffda"}, + {file = "coverage-7.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f47d39359e2c3779c5331fc740cf4bce6d9d680a7b4b4ead97056a0ae07cb49a"}, + {file = "coverage-7.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa72dbaf2c2068404b9870d93436e6d23addd8bbe9295f49cbca83f6e278179c"}, + {file = "coverage-7.3.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:beaa5c1b4777f03fc63dfd2a6bd820f73f036bfb10e925fce067b00a340d0f3f"}, + {file = "coverage-7.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:dbc1b46b92186cc8074fee9d9fbb97a9dd06c6cbbef391c2f59d80eabdf0faa6"}, + {file = "coverage-7.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:315a989e861031334d7bee1f9113c8770472db2ac484e5b8c3173428360a9148"}, + {file = "coverage-7.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d1bc430677773397f64a5c88cb522ea43175ff16f8bfcc89d467d974cb2274f9"}, + {file = "coverage-7.3.2-cp310-cp310-win32.whl", hash = "sha256:a889ae02f43aa45032afe364c8ae84ad3c54828c2faa44f3bfcafecb5c96b02f"}, + {file = "coverage-7.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:c0ba320de3fb8c6ec16e0be17ee1d3d69adcda99406c43c0409cb5c41788a611"}, + {file = "coverage-7.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ac8c802fa29843a72d32ec56d0ca792ad15a302b28ca6203389afe21f8fa062c"}, + {file = "coverage-7.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:89a937174104339e3a3ffcf9f446c00e3a806c28b1841c63edb2b369310fd074"}, + {file = "coverage-7.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e267e9e2b574a176ddb983399dec325a80dbe161f1a32715c780b5d14b5f583a"}, + {file = "coverage-7.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2443cbda35df0d35dcfb9bf8f3c02c57c1d6111169e3c85fc1fcc05e0c9f39a3"}, + {file = "coverage-7.3.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4175e10cc8dda0265653e8714b3174430b07c1dca8957f4966cbd6c2b1b8065a"}, + {file = "coverage-7.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0cbf38419fb1a347aaf63481c00f0bdc86889d9fbf3f25109cf96c26b403fda1"}, + {file = "coverage-7.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:5c913b556a116b8d5f6ef834038ba983834d887d82187c8f73dec21049abd65c"}, + {file = "coverage-7.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1981f785239e4e39e6444c63a98da3a1db8e971cb9ceb50a945ba6296b43f312"}, + {file = "coverage-7.3.2-cp311-cp311-win32.whl", hash = "sha256:43668cabd5ca8258f5954f27a3aaf78757e6acf13c17604d89648ecc0cc66640"}, + {file = "coverage-7.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10c39c0452bf6e694511c901426d6b5ac005acc0f78ff265dbe36bf81f808a2"}, + {file = "coverage-7.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:4cbae1051ab791debecc4a5dcc4a1ff45fc27b91b9aee165c8a27514dd160836"}, + {file = "coverage-7.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12d15ab5833a997716d76f2ac1e4b4d536814fc213c85ca72756c19e5a6b3d63"}, + {file = "coverage-7.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c7bba973ebee5e56fe9251300c00f1579652587a9f4a5ed8404b15a0471f216"}, + {file = "coverage-7.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fe494faa90ce6381770746077243231e0b83ff3f17069d748f645617cefe19d4"}, + {file = "coverage-7.3.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6e9589bd04d0461a417562649522575d8752904d35c12907d8c9dfeba588faf"}, + {file = "coverage-7.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d51ac2a26f71da1b57f2dc81d0e108b6ab177e7d30e774db90675467c847bbdf"}, + {file = "coverage-7.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:99b89d9f76070237975b315b3d5f4d6956ae354a4c92ac2388a5695516e47c84"}, + {file = "coverage-7.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fa28e909776dc69efb6ed975a63691bc8172b64ff357e663a1bb06ff3c9b589a"}, + {file = "coverage-7.3.2-cp312-cp312-win32.whl", hash = "sha256:289fe43bf45a575e3ab10b26d7b6f2ddb9ee2dba447499f5401cfb5ecb8196bb"}, + {file = "coverage-7.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:7dbc3ed60e8659bc59b6b304b43ff9c3ed858da2839c78b804973f613d3e92ed"}, + {file = "coverage-7.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f94b734214ea6a36fe16e96a70d941af80ff3bfd716c141300d95ebc85339738"}, + {file = "coverage-7.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:af3d828d2c1cbae52d34bdbb22fcd94d1ce715d95f1a012354a75e5913f1bda2"}, + {file = "coverage-7.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:630b13e3036e13c7adc480ca42fa7afc2a5d938081d28e20903cf7fd687872e2"}, + {file = "coverage-7.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c9eacf273e885b02a0273bb3a2170f30e2d53a6d53b72dbe02d6701b5296101c"}, + {file = "coverage-7.3.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8f17966e861ff97305e0801134e69db33b143bbfb36436efb9cfff6ec7b2fd9"}, + {file = "coverage-7.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b4275802d16882cf9c8b3d057a0839acb07ee9379fa2749eca54efbce1535b82"}, + {file = "coverage-7.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:72c0cfa5250f483181e677ebc97133ea1ab3eb68645e494775deb6a7f6f83901"}, + {file = "coverage-7.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:cb536f0dcd14149425996821a168f6e269d7dcd2c273a8bff8201e79f5104e76"}, + {file = "coverage-7.3.2-cp38-cp38-win32.whl", hash = "sha256:307adb8bd3abe389a471e649038a71b4eb13bfd6b7dd9a129fa856f5c695cf92"}, + {file = "coverage-7.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:88ed2c30a49ea81ea3b7f172e0269c182a44c236eb394718f976239892c0a27a"}, + {file = "coverage-7.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b631c92dfe601adf8f5ebc7fc13ced6bb6e9609b19d9a8cd59fa47c4186ad1ce"}, + {file = "coverage-7.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d3d9df4051c4a7d13036524b66ecf7a7537d14c18a384043f30a303b146164e9"}, + {file = "coverage-7.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f7363d3b6a1119ef05015959ca24a9afc0ea8a02c687fe7e2d557705375c01f"}, + {file = "coverage-7.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2f11cc3c967a09d3695d2a6f03fb3e6236622b93be7a4b5dc09166a861be6d25"}, + {file = "coverage-7.3.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:149de1d2401ae4655c436a3dced6dd153f4c3309f599c3d4bd97ab172eaf02d9"}, + {file = "coverage-7.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:3a4006916aa6fee7cd38db3bfc95aa9c54ebb4ffbfc47c677c8bba949ceba0a6"}, + {file = "coverage-7.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9028a3871280110d6e1aa2df1afd5ef003bab5fb1ef421d6dc748ae1c8ef2ebc"}, + {file = "coverage-7.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9f805d62aec8eb92bab5b61c0f07329275b6f41c97d80e847b03eb894f38d083"}, + {file = "coverage-7.3.2-cp39-cp39-win32.whl", hash = "sha256:d1c88ec1a7ff4ebca0219f5b1ef863451d828cccf889c173e1253aa84b1e07ce"}, + {file = "coverage-7.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b4767da59464bb593c07afceaddea61b154136300881844768037fd5e859353f"}, + {file = "coverage-7.3.2-pp38.pp39.pp310-none-any.whl", hash = "sha256:ae97af89f0fbf373400970c0a21eef5aa941ffeed90aee43650b81f7d7f47637"}, + {file = "coverage-7.3.2.tar.gz", hash = "sha256:be32ad29341b0170e795ca590e1c07e81fc061cb5b10c74ce7203491484404ef"}, +] + +[package.extras] +toml = ["tomli"] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "mypy" +version = "1.7.1" +description = "Optional static typing for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "mypy-1.7.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:12cce78e329838d70a204293e7b29af9faa3ab14899aec397798a4b41be7f340"}, + {file = "mypy-1.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1484b8fa2c10adf4474f016e09d7a159602f3239075c7bf9f1627f5acf40ad49"}, + {file = "mypy-1.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31902408f4bf54108bbfb2e35369877c01c95adc6192958684473658c322c8a5"}, + {file = "mypy-1.7.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f2c2521a8e4d6d769e3234350ba7b65ff5d527137cdcde13ff4d99114b0c8e7d"}, + {file = "mypy-1.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:fcd2572dd4519e8a6642b733cd3a8cfc1ef94bafd0c1ceed9c94fe736cb65b6a"}, + {file = "mypy-1.7.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4b901927f16224d0d143b925ce9a4e6b3a758010673eeded9b748f250cf4e8f7"}, + {file = "mypy-1.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2f7f6985d05a4e3ce8255396df363046c28bea790e40617654e91ed580ca7c51"}, + {file = "mypy-1.7.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:944bdc21ebd620eafefc090cdf83158393ec2b1391578359776c00de00e8907a"}, + {file = "mypy-1.7.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9c7ac372232c928fff0645d85f273a726970c014749b924ce5710d7d89763a28"}, + {file = "mypy-1.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:f6efc9bd72258f89a3816e3a98c09d36f079c223aa345c659622f056b760ab42"}, + {file = "mypy-1.7.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6dbdec441c60699288adf051f51a5d512b0d818526d1dcfff5a41f8cd8b4aaf1"}, + {file = "mypy-1.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4fc3d14ee80cd22367caaaf6e014494415bf440980a3045bf5045b525680ac33"}, + {file = "mypy-1.7.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c6e4464ed5f01dc44dc9821caf67b60a4e5c3b04278286a85c067010653a0eb"}, + {file = "mypy-1.7.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:d9b338c19fa2412f76e17525c1b4f2c687a55b156320acb588df79f2e6fa9fea"}, + {file = "mypy-1.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:204e0d6de5fd2317394a4eff62065614c4892d5a4d1a7ee55b765d7a3d9e3f82"}, + {file = "mypy-1.7.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:84860e06ba363d9c0eeabd45ac0fde4b903ad7aa4f93cd8b648385a888e23200"}, + {file = "mypy-1.7.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8c5091ebd294f7628eb25ea554852a52058ac81472c921150e3a61cdd68f75a7"}, + {file = "mypy-1.7.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40716d1f821b89838589e5b3106ebbc23636ffdef5abc31f7cd0266db936067e"}, + {file = "mypy-1.7.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5cf3f0c5ac72139797953bd50bc6c95ac13075e62dbfcc923571180bebb662e9"}, + {file = "mypy-1.7.1-cp38-cp38-win_amd64.whl", hash = "sha256:78e25b2fd6cbb55ddfb8058417df193f0129cad5f4ee75d1502248e588d9e0d7"}, + {file = "mypy-1.7.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:75c4d2a6effd015786c87774e04331b6da863fc3fc4e8adfc3b40aa55ab516fe"}, + {file = "mypy-1.7.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2643d145af5292ee956aa0a83c2ce1038a3bdb26e033dadeb2f7066fb0c9abce"}, + {file = "mypy-1.7.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75aa828610b67462ffe3057d4d8a4112105ed211596b750b53cbfe182f44777a"}, + {file = "mypy-1.7.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ee5d62d28b854eb61889cde4e1dbc10fbaa5560cb39780c3995f6737f7e82120"}, + {file = "mypy-1.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:72cf32ce7dd3562373f78bd751f73c96cfb441de147cc2448a92c1a308bd0ca6"}, + {file = "mypy-1.7.1-py3-none-any.whl", hash = "sha256:f7c5d642db47376a0cc130f0de6d055056e010debdaf0707cd2b0fc7e7ef30ea"}, + {file = "mypy-1.7.1.tar.gz", hash = "sha256:fcb6d9afb1b6208b4c712af0dafdc650f518836065df0d4fb1d800f5d6773db2"}, +] + +[package.dependencies] +mypy-extensions = ">=1.0.0" +typing-extensions = ">=4.1.0" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +install-types = ["pip"] +mypyc = ["setuptools (>=50)"] +reports = ["lxml"] + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] + +[[package]] +name = "packaging" +version = "23.2" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, + {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, +] + +[[package]] +name = "pika" +version = "1.3.2" +description = "Pika Python AMQP Client Library" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pika-1.3.2-py3-none-any.whl", hash = "sha256:0779a7c1fafd805672796085560d290213a465e4f6f76a6fb19e378d8041a14f"}, + {file = "pika-1.3.2.tar.gz", hash = "sha256:b2a327ddddf8570b4965b3576ac77091b850262d34ce8c1d8cb4e4146aa4145f"}, +] + +[package.extras] +gevent = ["gevent"] +tornado = ["tornado"] +twisted = ["twisted"] + +[[package]] +name = "pluggy" +version = "1.3.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, + {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "psycopg2-binary" +version = "2.9.9" +description = "psycopg2 - Python-PostgreSQL Database Adapter" +optional = false +python-versions = ">=3.7" +files = [ + {file = "psycopg2-binary-2.9.9.tar.gz", hash = "sha256:7f01846810177d829c7692f1f5ada8096762d9172af1b1a28d4ab5b77c923c1c"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c2470da5418b76232f02a2fcd2229537bb2d5a7096674ce61859c3229f2eb202"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c6af2a6d4b7ee9615cbb162b0738f6e1fd1f5c3eda7e5da17861eacf4c717ea7"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:75723c3c0fbbf34350b46a3199eb50638ab22a0228f93fb472ef4d9becc2382b"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:83791a65b51ad6ee6cf0845634859d69a038ea9b03d7b26e703f94c7e93dbcf9"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0ef4854e82c09e84cc63084a9e4ccd6d9b154f1dbdd283efb92ecd0b5e2b8c84"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed1184ab8f113e8d660ce49a56390ca181f2981066acc27cf637d5c1e10ce46e"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d2997c458c690ec2bc6b0b7ecbafd02b029b7b4283078d3b32a852a7ce3ddd98"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:b58b4710c7f4161b5e9dcbe73bb7c62d65670a87df7bcce9e1faaad43e715245"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:0c009475ee389757e6e34611d75f6e4f05f0cf5ebb76c6037508318e1a1e0d7e"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8dbf6d1bc73f1d04ec1734bae3b4fb0ee3cb2a493d35ede9badbeb901fb40f6f"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-win32.whl", hash = "sha256:3f78fd71c4f43a13d342be74ebbc0666fe1f555b8837eb113cb7416856c79682"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:876801744b0dee379e4e3c38b76fc89f88834bb15bf92ee07d94acd06ec890a0"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ee825e70b1a209475622f7f7b776785bd68f34af6e7a46e2e42f27b659b5bc26"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1ea665f8ce695bcc37a90ee52de7a7980be5161375d42a0b6c6abedbf0d81f0f"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:143072318f793f53819048fdfe30c321890af0c3ec7cb1dfc9cc87aa88241de2"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c332c8d69fb64979ebf76613c66b985414927a40f8defa16cf1bc028b7b0a7b0"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7fc5a5acafb7d6ccca13bfa8c90f8c51f13d8fb87d95656d3950f0158d3ce53"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:977646e05232579d2e7b9c59e21dbe5261f403a88417f6a6512e70d3f8a046be"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b6356793b84728d9d50ead16ab43c187673831e9d4019013f1402c41b1db9b27"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bc7bb56d04601d443f24094e9e31ae6deec9ccb23581f75343feebaf30423359"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:77853062a2c45be16fd6b8d6de2a99278ee1d985a7bd8b103e97e41c034006d2"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:78151aa3ec21dccd5cdef6c74c3e73386dcdfaf19bced944169697d7ac7482fc"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-win32.whl", hash = "sha256:dc4926288b2a3e9fd7b50dc6a1909a13bbdadfc67d93f3374d984e56f885579d"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:b76bedd166805480ab069612119ea636f5ab8f8771e640ae103e05a4aae3e417"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e6f98446430fdf41bd36d4faa6cb409f5140c1c2cf58ce0bbdaf16af7d3f119"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c77e3d1862452565875eb31bdb45ac62502feabbd53429fdc39a1cc341d681ba"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win32.whl", hash = "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2293b001e319ab0d869d660a704942c9e2cce19745262a8aba2115ef41a0a42a"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ef7df18daf2c4c07e2695e8cfd5ee7f748a1d54d802330985a78d2a5a6dca9"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a602ea5aff39bb9fac6308e9c9d82b9a35c2bf288e184a816002c9fae930b77"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8359bf4791968c5a78c56103702000105501adb557f3cf772b2c207284273984"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:275ff571376626195ab95a746e6a04c7df8ea34638b99fc11160de91f2fef503"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f9b5571d33660d5009a8b3c25dc1db560206e2d2f89d3df1cb32d72c0d117d52"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:420f9bbf47a02616e8554e825208cb947969451978dceb77f95ad09c37791dae"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:4154ad09dac630a0f13f37b583eae260c6aa885d67dfbccb5b02c33f31a6d420"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a148c5d507bb9b4f2030a2025c545fccb0e1ef317393eaba42e7eabd28eb6041"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:68fc1f1ba168724771e38bee37d940d2865cb0f562380a1fb1ffb428b75cb692"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:281309265596e388ef483250db3640e5f414168c5a67e9c665cafce9492eda2f"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:60989127da422b74a04345096c10d416c2b41bd7bf2a380eb541059e4e999980"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:246b123cc54bb5361588acc54218c8c9fb73068bf227a4a531d8ed56fa3ca7d6"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:34eccd14566f8fe14b2b95bb13b11572f7c7d5c36da61caf414d23b91fcc5d94"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18d0ef97766055fec15b5de2c06dd8e7654705ce3e5e5eed3b6651a1d2a9a152"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d3f82c171b4ccd83bbaf35aa05e44e690113bd4f3b7b6cc54d2219b132f3ae55"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ead20f7913a9c1e894aebe47cccf9dc834e1618b7aa96155d2091a626e59c972"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ca49a8119c6cbd77375ae303b0cfd8c11f011abbbd64601167ecca18a87e7cdd"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:323ba25b92454adb36fa425dc5cf6f8f19f78948cbad2e7bc6cdf7b0d7982e59"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:1236ed0952fbd919c100bc839eaa4a39ebc397ed1c08a97fc45fee2a595aa1b3"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:729177eaf0aefca0994ce4cffe96ad3c75e377c7b6f4efa59ebf003b6d398716"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-win32.whl", hash = "sha256:804d99b24ad523a1fe18cc707bf741670332f7c7412e9d49cb5eab67e886b9b5"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-win_amd64.whl", hash = "sha256:a6cdcc3ede532f4a4b96000b6362099591ab4a3e913d70bcbac2b56c872446f7"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:72dffbd8b4194858d0941062a9766f8297e8868e1dd07a7b36212aaa90f49472"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:30dcc86377618a4c8f3b72418df92e77be4254d8f89f14b8e8f57d6d43603c0f"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31a34c508c003a4347d389a9e6fcc2307cc2150eb516462a7a17512130de109e"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:15208be1c50b99203fe88d15695f22a5bed95ab3f84354c494bcb1d08557df67"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1873aade94b74715be2246321c8650cabf5a0d098a95bab81145ffffa4c13876"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a58c98a7e9c021f357348867f537017057c2ed7f77337fd914d0bedb35dace7"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4686818798f9194d03c9129a4d9a702d9e113a89cb03bffe08c6cf799e053291"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ebdc36bea43063116f0486869652cb2ed7032dbc59fbcb4445c4862b5c1ecf7f"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:ca08decd2697fdea0aea364b370b1249d47336aec935f87b8bbfd7da5b2ee9c1"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ac05fb791acf5e1a3e39402641827780fe44d27e72567a000412c648a85ba860"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-win32.whl", hash = "sha256:9dba73be7305b399924709b91682299794887cbbd88e38226ed9f6712eabee90"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-win_amd64.whl", hash = "sha256:f7ae5d65ccfbebdfa761585228eb4d0df3a8b15cfb53bd953e713e09fbb12957"}, +] + +[[package]] +name = "pytest" +version = "7.4.3" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-7.4.3-py3-none-any.whl", hash = "sha256:0d009c083ea859a71b76adf7c1d502e4bc170b80a8ef002da5806527b9591fac"}, + {file = "pytest-7.4.3.tar.gz", hash = "sha256:d989d136982de4e3b29dabcc838ad581c64e8ed52c11fbe86ddebd9da0818cd5"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "types-psycopg2" +version = "2.9.21.19" +description = "Typing stubs for psycopg2" +optional = false +python-versions = ">=3.7" +files = [ + {file = "types-psycopg2-2.9.21.19.tar.gz", hash = "sha256:ec3aae522dde9c41141597bc41123b4c955fb4093b1fc7ec6ee607795a0a088f"}, + {file = "types_psycopg2-2.9.21.19-py3-none-any.whl", hash = "sha256:8a4871df20c29e516622be8d66b91814c3262ff94112ff9e2f72a043d8fdf03c"}, +] + +[[package]] +name = "typing-extensions" +version = "4.8.0" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +files = [ + {file = "typing_extensions-4.8.0-py3-none-any.whl", hash = "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0"}, + {file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"}, +] + +[metadata] +lock-version = "2.0" +python-versions = "^3.11" +content-hash = "bdaa71e5b91e5b90cc4da6748f061c503660fe30c28baebe08774e02404358dc" diff --git a/consumer/pyproject.toml b/consumer/pyproject.toml index 1045eb4..64d7d70 100644 --- a/consumer/pyproject.toml +++ b/consumer/pyproject.toml @@ -6,9 +6,17 @@ authors = ["alexau "] readme = "README.md" [tool.poetry.dependencies] -python = "^3.12" +python = "^3.11" +pika = "^1.3.2" +psycopg2-binary = "^2.9.9" +types-psycopg2 = "^2.9.21.19" +[tool.poetry.group.dev.dependencies] +pytest = "^7.4.3" +coverage = "^7.3.2" +mypy = "^1.7.1" + [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" diff --git a/consumer/requirements-dev.txt b/consumer/requirements-dev.txt new file mode 100644 index 0000000..1d619b5 --- /dev/null +++ b/consumer/requirements-dev.txt @@ -0,0 +1,12 @@ +colorama==0.4.6 ; python_version >= "3.11" and python_version < "4.0" and sys_platform == "win32" +coverage==7.3.2 ; python_version >= "3.11" and python_version < "4.0" +iniconfig==2.0.0 ; python_version >= "3.11" and python_version < "4.0" +mypy-extensions==1.0.0 ; python_version >= "3.11" and python_version < "4.0" +mypy==1.7.1 ; python_version >= "3.11" and python_version < "4.0" +packaging==23.2 ; python_version >= "3.11" and python_version < "4.0" +pika==1.3.2 ; python_version >= "3.11" and python_version < "4.0" +pluggy==1.3.0 ; python_version >= "3.11" and python_version < "4.0" +psycopg2-binary==2.9.9 ; python_version >= "3.11" and python_version < "4.0" +pytest==7.4.3 ; python_version >= "3.11" and python_version < "4.0" +types-psycopg2==2.9.21.19 ; python_version >= "3.11" and python_version < "4.0" +typing-extensions==4.8.0 ; python_version >= "3.11" and python_version < "4.0" diff --git a/consumer/requirements.txt b/consumer/requirements.txt new file mode 100644 index 0000000..6bfaac6 --- /dev/null +++ b/consumer/requirements.txt @@ -0,0 +1,3 @@ +pika==1.3.2 ; python_version >= "3.11" and python_version < "4.0" +psycopg2-binary==2.9.9 ; python_version >= "3.11" and python_version < "4.0" +types-psycopg2==2.9.21.19 ; python_version >= "3.11" and python_version < "4.0" diff --git a/consumer/reuqirements-dev.txt b/consumer/reuqirements-dev.txt new file mode 100644 index 0000000..6ed92fd --- /dev/null +++ b/consumer/reuqirements-dev.txt @@ -0,0 +1,2 @@ +pika==1.3.2 ; python_version >= "3.11" and python_version < "4.0" +psycopg2-binary==2.9.9 ; python_version >= "3.11" and python_version < "4.0" diff --git a/producer/src/adapters/upsert_filenames/__init__.py b/consumer/src/adapters/__init__.py similarity index 100% rename from producer/src/adapters/upsert_filenames/__init__.py rename to consumer/src/adapters/__init__.py diff --git a/consumer/src/adapters/fetch_filenames/__init__.py b/consumer/src/adapters/fetch_filenames/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/consumer/src/adapters/fetch_filenames/rabbitmq.py b/consumer/src/adapters/fetch_filenames/rabbitmq.py new file mode 100644 index 0000000..3934a53 --- /dev/null +++ b/consumer/src/adapters/fetch_filenames/rabbitmq.py @@ -0,0 +1,88 @@ +from contextlib import contextmanager +from usecases import FetchFilenameClient +import pika +from typing import Iterator, Optional, TYPE_CHECKING, final +from typing_extensions import override +from collections.abc import Callable +import logging + +if TYPE_CHECKING: + from pika.adapters.blocking_connection import BlockingChannel + from pika.spec import Basic, BasicProperties + from pika.connection import Connection + + +@final +class RabbitMQFetchFilenamesClient(FetchFilenameClient): + def __init__( + self, + host: str, + port: int, + credentials_service: Callable[[], tuple[str, str]], + queue: str = "filenames", + polling_timeout: int = 10, + ) -> None: + self._host = host + self._port = port + self._credentials_service = credentials_service + self._queue = queue + self._conn: Optional[Connection] = None + self._polling_timeout = polling_timeout + + def _reset_conn(self) -> None: + self._conn = None + + @contextmanager + def _get_amqp_conn(self) -> Iterator[pika.BaseConnection]: + if self._conn is None or self._conn.is_closed: + username, password = self._credentials_service() + credentials = pika.PlainCredentials(username, password) + conn_parameters = pika.ConnectionParameters( + host=self._host, + port=self._port, + credentials=credentials, + ) + self._conn = pika.BlockingConnection(conn_parameters) + yield self._conn + + @override + def fetch(self) -> Iterator[str]: + while True: + try: + with self._get_amqp_conn() as connection: + channel: BlockingChannel = connection.channel() + channel.queue_declare(queue=self._queue, durable=True) + + method: Optional[Basic.Deliver] + properties: Optional[BasicProperties] + body: Optional[bytes] + for method, properties, body in channel.consume( + queue=self._queue, inactivity_timeout=self._polling_timeout + ): + if method == None and properties == None and body == None: + raise StopIteration + try: + yield body.decode("utf-8") + channel.basic_ack(delivery_tag=method.delivery_tag) + except Exception as e: + logging.exception(e) + channel.basic_nack(delivery_tag=method.delivery_tag) + raise e + except StopIteration: + logging.info("No more filenames to fetch") + break + except Exception as e: + logging.exception(e) + self._reset_conn() + raise e + + @override + def close(self) -> bool: + try: + if self._conn is not None: + self._conn.close() + return True + return False + except Exception as e: + logging.exception(e) + return False diff --git a/consumer/src/adapters/file_parse_iot_records/__init__.py b/consumer/src/adapters/file_parse_iot_records/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/consumer/src/adapters/file_parse_iot_records/csv.py b/consumer/src/adapters/file_parse_iot_records/csv.py new file mode 100644 index 0000000..2b08191 --- /dev/null +++ b/consumer/src/adapters/file_parse_iot_records/csv.py @@ -0,0 +1,97 @@ +from concurrent.futures import ThreadPoolExecutor +from datetime import datetime +from decimal import Decimal +from typing import Iterator, Optional, overload, Sequence, final, TYPE_CHECKING +from typing_extensions import override +from entities import IOTRecord +from usecases import FileParseIOTRecordsClient +import csv +import logging + + +@final +class CSVParseIOTRecordsClient(FileParseIOTRecordsClient): + def __init__( + self, + recognized_datetime_formats: Sequence[str], + delimiter: str = ",", + ) -> None: + self._delimiter = delimiter + self._recognized_datetime_formats = recognized_datetime_formats + + @overload + def parse(self, filename: str) -> list[IOTRecord]: + ... + + @overload + def parse(self, filename: Sequence[str]) -> list[list[IOTRecord]]: + ... + + @override + def parse( + self, filename: str | Sequence[str] + ) -> list[IOTRecord] | list[list[IOTRecord]]: + if isinstance(filename, str): + return self._parse_single(filename) + return self._parse_batch(filename) + + @override + def parse_stream(self, filename: str) -> Iterator[IOTRecord]: + try: + with open(filename) as csvfile: + reader = csv.reader(csvfile, delimiter=self._delimiter) + yield from self._parse_iter(reader) + except Exception as e: + logging.exception(e) + + def _parse_datetime(self, datetime_str: str) -> Optional[datetime]: + for datetime_format in self._recognized_datetime_formats: + try: + return datetime.strptime(datetime_str, datetime_format) + except ValueError: + pass + return None + + def _parse_value(self, value_str: str) -> Optional[Decimal]: + try: + return Decimal(value_str) + except ValueError: + return None + + def _parse_iter(self, reader: Iterator[list[str]]) -> Iterator[IOTRecord]: + iot_records: list[IOTRecord] = [] + for row in reader: + try: + parsed_datetime = self._parse_datetime(row[0]) + if parsed_datetime is None: + raise ValueError(f"Unrecognized datetime format: {row[0]}") + + parsed_value = self._parse_value(row[2]) + if parsed_value is None: + raise ValueError(f"Unrecognized value format: {row[2]}") + + yield IOTRecord( + datetime=parsed_datetime, + sensor_id=str(row[1]), + value=parsed_value, + ) + except Exception as e: + logging.exception(e) + return iot_records + + def _parse_single(self, filename: str) -> list[IOTRecord]: + try: + with open(filename) as csvfile: + reader = csv.reader(csvfile, delimiter=self._delimiter) + return list(self._parse_iter(reader)) + except Exception as e: + logging.exception(e) + return [] + + def _parse_batch(self, filenames: Sequence[str]) -> list[list[IOTRecord]]: + with ThreadPoolExecutor() as executor: + return list(executor.map(self._parse_single, filenames)) + + @override + def close(self) -> bool: + return True diff --git a/consumer/src/adapters/upsert_iot_records/__init__.py b/consumer/src/adapters/upsert_iot_records/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/consumer/src/adapters/upsert_iot_records/postgres.py b/consumer/src/adapters/upsert_iot_records/postgres.py new file mode 100644 index 0000000..bfe28d9 --- /dev/null +++ b/consumer/src/adapters/upsert_iot_records/postgres.py @@ -0,0 +1,138 @@ +from contextlib import contextmanager +import logging +from typing import Iterator, Optional, Sequence, overload, TYPE_CHECKING, final, TypeVar +from typing_extensions import override +import psycopg2 +from usecases import UpsertIOTRecordsClient +from entities import IOTRecord +from collections.abc import Callable + +if TYPE_CHECKING: + from psycopg2.extensions import connection + +T = TypeVar("T") + + +@final +class PostgresUpsertIOTRecordsClient(UpsertIOTRecordsClient): + def __init__( + self, + host: str, + port: int, + credentials_service: Callable[[], tuple[str, str]], + database: str, + batch_upsert_size: int = 1000, + ) -> None: + self._host = host + self._port = port + self._credentials_service = credentials_service + self._database = database + self._batch_upsert_size = batch_upsert_size + self._conn: Optional[connection] = None + + @overload + def upsert(self, iot_record: IOTRecord) -> bool: + ... + + @overload + def upsert(self, iot_record: Sequence[IOTRecord]) -> list[bool]: + ... + + @override + def upsert(self, iot_record: IOTRecord | Sequence[IOTRecord]) -> bool | list[bool]: + if isinstance(iot_record, IOTRecord): + return self._upsert_single(iot_record) + return self._upsert_batch(iot_record) + + def _reset_conn(self) -> None: + self._conn = None + + @contextmanager + def _get_conn(self) -> Iterator[connection]: + if self._conn is None or self._conn.closed: + username, password = self._credentials_service() + self._conn = psycopg2.connect( + host=self._host, + port=self._port, + user=username, + password=password, + database=self._database, + ) + yield self._conn + + def _get_sql_stmt(self) -> str: + stmt = """ + INSERT INTO records( + record_time, + sensor_id, + value + ) VALUES ( + %(datetime)s, + %(sensor_id)s, + %(value)s + ) ON CONFLICT (record_time, sensor_id) DO UPDATE SET + value = EXCLUDED.value + """ + return stmt + + def _transform_iot_record_to_sql_dict( + self, + iot_record: IOTRecord, + ) -> dict: + return { + "record_time": iot_record.record_time, + "sensor_id": iot_record.sensor_id, + "value": iot_record.value, + } + + def _batch_generator( + self, + iterable: Sequence[T], + batch_size: int, + ) -> Iterator[Sequence[T]]: + for i in range(0, len(iterable), batch_size): + yield iterable[i : i + batch_size] + + def _upsert_single(self, iot_record: IOTRecord) -> bool: + try: + with self._get_conn() as conn, conn.cursor() as cursor: + try: + cursor.execute( + self._get_sql_stmt(), + self._transform_iot_record_to_sql_dict(iot_record), + ) + conn.commit() + return True + except Exception as e: + conn.rollback() + logging.exception(e) + return False + except Exception as e: + logging.exception(e) + self._reset_conn() + return False + + def _upsert_batch(self, iot_records: Sequence[IOTRecord]) -> list[bool]: + successes: list[bool] = [] + for batch in self._batch_generator(iot_records, self._batch_upsert_size): + try: + with self._get_conn() as conn, conn.cursor() as cursor: + try: + cursor.executemany( + self._get_sql_stmt(), + [ + self._transform_iot_record_to_sql_dict(iot_record) + for iot_record in batch + ], + ) + conn.commit() + successes.extend([True] * len(batch)) + except Exception as e: + conn.rollback() + logging.exception(e) + successes.extend([False] * len(batch)) + except Exception as e: + logging.exception(e) + self._reset_conn() + successes.extend([False] * len(batch)) + return successes diff --git a/consumer/src/deployments/__init__.py b/consumer/src/deployments/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/consumer/src/deployments/scripts/__init__.py b/consumer/src/deployments/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/consumer/src/deployments/scripts/config.py b/consumer/src/deployments/scripts/config.py new file mode 100644 index 0000000..071018d --- /dev/null +++ b/consumer/src/deployments/scripts/config.py @@ -0,0 +1,37 @@ +import os + + +class LoggingConfig: + LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO") + LOG_FORMAT = os.getenv( + "LOG_FORMAT", "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) + LOG_DATE_FORMAT = os.getenv("LOG_DATE_FORMAT", "%Y-%m-%d %H:%M:%S") + LOG_DIR = os.getenv("LOG_DIR", "/tmp") + LOG_RETENTION = os.getenv("LOG_RETENTION", "7") + LOG_ROTATION = os.getenv("LOG_ROTATION", "midnight") + + +class RabbitMQConfig: + HOST = os.getenv("RABBITMQ_HOST", "localhost") + PORT = int(os.getenv("RABBITMQ_PORT", 5672)) + USERNAME = os.getenv("RABBITMQ_USERNAME", "guest") + PASSWORD = os.getenv("RABBITMQ_PASSWORD", "guest") + QUEUE = os.getenv("RABBITMQ_QUEUE", "filenames") + POLLING_TIMEOUT = int(os.getenv("RABBITMQ_POLLING_TIMEOUT", 600)) + + +class PostgresConfig: + HOST = os.getenv("POSTGRES_HOST", "localhost") + PORT = int(os.getenv("POSTGRES_PORT", 5432)) + USERNAME = os.getenv("POSTGRES_USERNAME", "postgres") + PASSWORD = os.getenv("POSTGRES_PASSWORD", "postgres") + DATABASE = os.getenv("POSTGRES_DATABASE", "postgres") + BATCH_UPSERT_SIZE = int(os.getenv("POSTGRES_BATCH_UPSERT_SIZE", 1000)) + + +class CSVParserConfig: + RECOGNIZED_DATETIME_FORMATS = os.getenv( + "CSV_PARSER_RECOGNIZED_DATETIME_FORMATS", "" + ).split(",") + DELIMITER = os.getenv("CSV_PARSER_DELIMITER", ",") diff --git a/consumer/src/deployments/scripts/main.py b/consumer/src/deployments/scripts/main.py new file mode 100644 index 0000000..0b067f7 --- /dev/null +++ b/consumer/src/deployments/scripts/main.py @@ -0,0 +1,59 @@ +from adapters.fetch_filenames.rabbitmq import RabbitMQFetchFilenamesClient +from adapters.file_parse_iot_records.csv import CSVParseIOTRecordsClient +from adapters.upsert_iot_records.postgres import PostgresUpsertIOTRecordsClient +from config import RabbitMQConfig, PostgresConfig, CSVParserConfig +from setup_logging import setup_logging +import logging +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from entities import IOTRecord + +setup_logging() + +fetch_filenames_client = RabbitMQFetchFilenamesClient( + host=RabbitMQConfig.HOST, + port=RabbitMQConfig.PORT, + username=RabbitMQConfig.USERNAME, + password=RabbitMQConfig.PASSWORD, + queue=RabbitMQConfig.QUEUE, + polling_timeout=RabbitMQConfig.POLLING_TIMEOUT, +) + +file_parse_iot_records_client = CSVParseIOTRecordsClient( + recognized_datetime_formats=CSVParserConfig.RECOGNIZED_DATETIME_FORMATS, + delimiter=CSVParserConfig.DELIMITER, +) + +upsert_iot_records_client = PostgresUpsertIOTRecordsClient( + host=PostgresConfig.HOST, + port=PostgresConfig.PORT, + username=PostgresConfig.USERNAME, + password=PostgresConfig.PASSWORD, + database=PostgresConfig.DATABASE, + batch_upsert_size=PostgresConfig.BATCH_UPSERT_SIZE, +) + + +def main() -> None: + filestream_buffer: list[IOTRecord] = [] + try: + for filename in fetch_filenames_client.fetch(): + for iot_record in file_parse_iot_records_client.parse_stream(filename): + filestream_buffer.append(iot_record) + if len(filestream_buffer) >= PostgresConfig.BATCH_UPSERT_SIZE: + upsert_iot_records_client.upsert(filestream_buffer) + filestream_buffer.clear() + if filestream_buffer: + upsert_iot_records_client.upsert(filestream_buffer) + filestream_buffer.clear() + except Exception as e: + logging.exception(e) + raise e + finally: + fetch_filenames_client.close() + upsert_iot_records_client.close() + + +if __name__ == "__main__": + main() diff --git a/consumer/src/deployments/scripts/setup_logging.py b/consumer/src/deployments/scripts/setup_logging.py new file mode 100644 index 0000000..161394c --- /dev/null +++ b/consumer/src/deployments/scripts/setup_logging.py @@ -0,0 +1,70 @@ +import logging +from logging.handlers import TimedRotatingFileHandler +from config import LoggingConfig +import pathlib + + +def setup_logging() -> None: + LOG_LEVEL_INT = getattr(logging, LoggingConfig.LOG_LEVEL.upper(), None) + + pathlib.Path(LoggingConfig.LOG_DIR).mkdir(parents=True, exist_ok=True) + + handlers: list[logging.Handler] = [] + + stream_handler = logging.StreamHandler() + stream_handler.setFormatter( + logging.Formatter( + LoggingConfig.LOG_FORMAT, datefmt=LoggingConfig.LOG_DATE_FORMAT + ) + ) + stream_handler.setLevel(LoggingConfig.LOG_LEVEL) + handlers.append(stream_handler) + + if LOG_LEVEL_INT is not None and LOG_LEVEL_INT <= logging.INFO: + info_handler = TimedRotatingFileHandler( + filename=f"{LoggingConfig.LOG_DIR}/info.log", + when=LoggingConfig.LOG_ROTATION, + interval=1, + backupCount=LoggingConfig.LOG_RETENTION, + ) + info_handler.setFormatter( + logging.Formatter( + LoggingConfig.LOG_FORMAT, datefmt=LoggingConfig.LOG_DATE_FORMAT + ) + ) + info_handler.setLevel(logging.INFO) + handlers.append(info_handler) + + if LOG_LEVEL_INT is not None and LOG_LEVEL_INT <= logging.WARNING: + warning_handler = TimedRotatingFileHandler( + filename=f"{LoggingConfig.LOG_DIR}/warning.log", + when=LoggingConfig.LOG_ROTATION, + interval=1, + backupCount=LoggingConfig.LOG_RETENTION, + ) + warning_handler.setFormatter( + logging.Formatter( + LoggingConfig.LOG_FORMAT, datefmt=LoggingConfig.LOG_DATE_FORMAT + ) + ) + warning_handler.setLevel(logging.WARNING) + handlers.append(warning_handler) + + if LOG_LEVEL_INT is not None and LOG_LEVEL_INT <= logging.ERROR: + error_handler = TimedRotatingFileHandler( + filename=f"{LoggingConfig.LOG_DIR}/error.log", + when=LoggingConfig.LOG_ROTATION, + interval=1, + backupCount=LoggingConfig.LOG_RETENTION, + ) + error_handler.setFormatter( + logging.Formatter( + LoggingConfig.LOG_FORMAT, datefmt=LoggingConfig.LOG_DATE_FORMAT + ) + ) + error_handler.setLevel(logging.ERROR) + handlers.append(error_handler) + + root_logger = logging.getLogger() + root_logger.setLevel(LoggingConfig.LOG_LEVEL) + root_logger.handlers = handlers diff --git a/consumer/src/entities/__init__.py b/consumer/src/entities/__init__.py new file mode 100644 index 0000000..7316450 --- /dev/null +++ b/consumer/src/entities/__init__.py @@ -0,0 +1 @@ +from .iot_record import IOTRecord diff --git a/consumer/src/entities/iot_record.py b/consumer/src/entities/iot_record.py new file mode 100644 index 0000000..0ef3748 --- /dev/null +++ b/consumer/src/entities/iot_record.py @@ -0,0 +1,10 @@ +from dataclasses import dataclass +from datetime import datetime +from decimal import Decimal + + +@dataclass +class IOTRecord: + record_time: datetime + sensor_id: str + value: Decimal diff --git a/consumer/src/usecases/__init__.py b/consumer/src/usecases/__init__.py new file mode 100644 index 0000000..4265028 --- /dev/null +++ b/consumer/src/usecases/__init__.py @@ -0,0 +1,3 @@ +from .fetch_filenames import FetchFilenameClient +from .file_parse_iot_records import FileParseIOTRecordsClient +from .upsert_iot_records import UpsertIOTRecordsClient diff --git a/consumer/src/usecases/fetch_filenames.py b/consumer/src/usecases/fetch_filenames.py new file mode 100644 index 0000000..39f0594 --- /dev/null +++ b/consumer/src/usecases/fetch_filenames.py @@ -0,0 +1,12 @@ +from abc import ABC, abstractmethod +from typing import Iterator + + +class FetchFilenameClient(ABC): + @abstractmethod + def fetch(self) -> Iterator[str]: + ... + + @abstractmethod + def close(self) -> bool: + ... diff --git a/consumer/src/usecases/file_parse_iot_records.py b/consumer/src/usecases/file_parse_iot_records.py new file mode 100644 index 0000000..5005f4b --- /dev/null +++ b/consumer/src/usecases/file_parse_iot_records.py @@ -0,0 +1,27 @@ +from abc import ABC, abstractmethod +from typing import Iterator, overload, Sequence +from entities import IOTRecord + + +class FileParseIOTRecordsClient(ABC): + @overload + def parse(self, filename: str) -> list[IOTRecord]: + ... + + @overload + def parse(self, filename: Sequence[str]) -> list[list[IOTRecord]]: + ... + + @abstractmethod + def parse( + self, filename: str | Sequence[str] + ) -> list[IOTRecord] | list[list[IOTRecord]]: + ... + + @abstractmethod + def parse_stream(self, filename: str) -> Iterator[IOTRecord]: + ... + + @abstractmethod + def close(self) -> bool: + ... diff --git a/consumer/src/usecases/upsert_iot_records.py b/consumer/src/usecases/upsert_iot_records.py new file mode 100644 index 0000000..7c6b5f7 --- /dev/null +++ b/consumer/src/usecases/upsert_iot_records.py @@ -0,0 +1,21 @@ +from abc import ABC, abstractmethod +from typing import overload, Sequence +from entities import IOTRecord + + +class UpsertIOTRecordsClient(ABC): + @overload + def upsert(self, iot_record: IOTRecord) -> bool: + ... + + @overload + def upsert(self, iot_record: Sequence[IOTRecord]) -> list[bool]: + ... + + @abstractmethod + def upsert(self, iot_record: IOTRecord | Sequence[IOTRecord]) -> bool | list[bool]: + ... + + @abstractmethod + def close(self) -> bool: + ... diff --git a/docker-compose.yml b/docker-compose.yml index b2e41d1..6d94f27 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -30,18 +30,47 @@ services: container_name: records_producer build: context: producer - dockerfile: dockerfile.prod + dockerfile: dockerfile args: AMAZON_LINUX_VERSION_TAG: ${AMAZON_LINUX_VERSION_TAG} environment: TARGET_FILE_DIR: ${TARGET_FILE_DIR} TARGET_FILE_EXTENSION: ${TARGET_FILE_EXTENSION} - LOG_LEVEL: ${LOG_LEVEL} - LOG_FORMAT: ${LOG_FORMAT} - LOG_DATE_FORMAT: ${LOG_DATE_FORMAT} + LOG_LEVEL: ${PRODUCER_LOG_LEVEL} + LOG_FORMAT: ${PRODUCER_LOG_FORMAT} + LOG_DATE_FORMAT: ${PRODUCER_LOG_DATE_FORMAT} + LOG_DIR: ${PRODUCER_LOG_DIR} + LOG_RETENTION: ${PRODUCER_LOG_RETENTION} + LOG_ROTATION: ${PRODUCER_LOG_ROTATION} + RABBITMQ_HOST: records_rabbitmq + RABBITMQ_PORT: 5672 + RABBITMQ_USER: ${RABBITMQ_USER} + RABBITMQ_PASSWORD: ${RABBITMQ_PASSWORD} + RABBITMQ_QUEUE: ${QUEUE_NAME} + records_consumer: + image: records_consumer:latest + build: + context: consumer + dockerfile: dockerfile + args: + AMAZON_LINUX_VERSION_TAG: ${AMAZON_LINUX_VERSION_TAG} + environment: + LOG_LEVEL: ${CONSUMER_LOG_LEVEL} + LOG_FORMAT: ${CONSUMER_LOG_FORMAT} + LOG_DATE_FORMAT: ${CONSUMER_LOG_DATE_FORMAT} + LOG_DIR: ${CONSUMER_LOG_DIR} + LOG_RETENTION: ${CONSUMER_LOG_RETENTION} + LOG_ROTATION: ${CONSUMER_LOG_ROTATION} RABBITMQ_HOST: records_rabbitmq - RABBITMQ_PORT: ${RABBITMQ_PORT} + RABBITMQ_PORT: 5672 RABBITMQ_USER: ${RABBITMQ_USER} RABBITMQ_PASSWORD: ${RABBITMQ_PASSWORD} RABBITMQ_QUEUE: ${QUEUE_NAME} - + POSTGRES_HOST: records_postgres + POSTGRES_PORT: 5432 + POSTGRES_USER: ${POSTGRES_USER} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} + POSTGRES_DATABASE: ${POSTGRES_DATABASE} + POSTGRES_BATCH_UPSERT_SIZE: ${POSTGRES_BATCH_UPSERT_SIZE} + CSV_PARSER_RECOGNIZED_DATETIME_FORMATS: ${CSV_PARSER_RECOGNIZED_DATETIME_FORMATS} + CSV_PARSER_DELIMITER: ${CSV_PARSER_DELIMITER} diff --git a/producer/dockerfile b/producer/dockerfile new file mode 100644 index 0000000..b343433 --- /dev/null +++ b/producer/dockerfile @@ -0,0 +1,36 @@ +ARG AMAZON_LINUX_VERSION_TAG +FROM amazonlinux:${AMAZON_LINUX_VERSION_TAG} as build +RUN yum update -y && \ + yum install -y \ + python3.11 \ + python3-pip \ + python3-devel \ + shadow-utils && \ + yum clean all + +RUN adduser app + +USER app +ENV HOME=/home/app +WORKDIR ${HOME} + +COPY requirements.txt . +RUN pip3 install --user -r requirements.txt + +FROM amazonlinux:${AMAZON_LINUX_VERSION_TAG} as runtime +RUN yum update -y && \ + yum install -y \ + python3.11 \ + python3-pip \ + shadow-utils && \ + yum clean all +RUN adduser app + +USER app +ENV HOME=/home/app +WORKDIR ${HOME} + +COPY --from=build ${HOME}/.local ${HOME}/.local + +COPY src/ . +CMD python3.11 -m deployments.script.main diff --git a/producer/poetry.lock b/producer/poetry.lock index c798a25..2224d04 100644 --- a/producer/poetry.lock +++ b/producer/poetry.lock @@ -1,5 +1,159 @@ # This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "coverage" +version = "7.3.2" +description = "Code coverage measurement for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "coverage-7.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d872145f3a3231a5f20fd48500274d7df222e291d90baa2026cc5152b7ce86bf"}, + {file = "coverage-7.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:310b3bb9c91ea66d59c53fa4989f57d2436e08f18fb2f421a1b0b6b8cc7fffda"}, + {file = "coverage-7.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f47d39359e2c3779c5331fc740cf4bce6d9d680a7b4b4ead97056a0ae07cb49a"}, + {file = "coverage-7.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa72dbaf2c2068404b9870d93436e6d23addd8bbe9295f49cbca83f6e278179c"}, + {file = "coverage-7.3.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:beaa5c1b4777f03fc63dfd2a6bd820f73f036bfb10e925fce067b00a340d0f3f"}, + {file = "coverage-7.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:dbc1b46b92186cc8074fee9d9fbb97a9dd06c6cbbef391c2f59d80eabdf0faa6"}, + {file = "coverage-7.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:315a989e861031334d7bee1f9113c8770472db2ac484e5b8c3173428360a9148"}, + {file = "coverage-7.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d1bc430677773397f64a5c88cb522ea43175ff16f8bfcc89d467d974cb2274f9"}, + {file = "coverage-7.3.2-cp310-cp310-win32.whl", hash = "sha256:a889ae02f43aa45032afe364c8ae84ad3c54828c2faa44f3bfcafecb5c96b02f"}, + {file = "coverage-7.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:c0ba320de3fb8c6ec16e0be17ee1d3d69adcda99406c43c0409cb5c41788a611"}, + {file = "coverage-7.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ac8c802fa29843a72d32ec56d0ca792ad15a302b28ca6203389afe21f8fa062c"}, + {file = "coverage-7.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:89a937174104339e3a3ffcf9f446c00e3a806c28b1841c63edb2b369310fd074"}, + {file = "coverage-7.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e267e9e2b574a176ddb983399dec325a80dbe161f1a32715c780b5d14b5f583a"}, + {file = "coverage-7.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2443cbda35df0d35dcfb9bf8f3c02c57c1d6111169e3c85fc1fcc05e0c9f39a3"}, + {file = "coverage-7.3.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4175e10cc8dda0265653e8714b3174430b07c1dca8957f4966cbd6c2b1b8065a"}, + {file = "coverage-7.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0cbf38419fb1a347aaf63481c00f0bdc86889d9fbf3f25109cf96c26b403fda1"}, + {file = "coverage-7.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:5c913b556a116b8d5f6ef834038ba983834d887d82187c8f73dec21049abd65c"}, + {file = "coverage-7.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1981f785239e4e39e6444c63a98da3a1db8e971cb9ceb50a945ba6296b43f312"}, + {file = "coverage-7.3.2-cp311-cp311-win32.whl", hash = "sha256:43668cabd5ca8258f5954f27a3aaf78757e6acf13c17604d89648ecc0cc66640"}, + {file = "coverage-7.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10c39c0452bf6e694511c901426d6b5ac005acc0f78ff265dbe36bf81f808a2"}, + {file = "coverage-7.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:4cbae1051ab791debecc4a5dcc4a1ff45fc27b91b9aee165c8a27514dd160836"}, + {file = "coverage-7.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12d15ab5833a997716d76f2ac1e4b4d536814fc213c85ca72756c19e5a6b3d63"}, + {file = "coverage-7.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c7bba973ebee5e56fe9251300c00f1579652587a9f4a5ed8404b15a0471f216"}, + {file = "coverage-7.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fe494faa90ce6381770746077243231e0b83ff3f17069d748f645617cefe19d4"}, + {file = "coverage-7.3.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6e9589bd04d0461a417562649522575d8752904d35c12907d8c9dfeba588faf"}, + {file = "coverage-7.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d51ac2a26f71da1b57f2dc81d0e108b6ab177e7d30e774db90675467c847bbdf"}, + {file = "coverage-7.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:99b89d9f76070237975b315b3d5f4d6956ae354a4c92ac2388a5695516e47c84"}, + {file = "coverage-7.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fa28e909776dc69efb6ed975a63691bc8172b64ff357e663a1bb06ff3c9b589a"}, + {file = "coverage-7.3.2-cp312-cp312-win32.whl", hash = "sha256:289fe43bf45a575e3ab10b26d7b6f2ddb9ee2dba447499f5401cfb5ecb8196bb"}, + {file = "coverage-7.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:7dbc3ed60e8659bc59b6b304b43ff9c3ed858da2839c78b804973f613d3e92ed"}, + {file = "coverage-7.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f94b734214ea6a36fe16e96a70d941af80ff3bfd716c141300d95ebc85339738"}, + {file = "coverage-7.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:af3d828d2c1cbae52d34bdbb22fcd94d1ce715d95f1a012354a75e5913f1bda2"}, + {file = "coverage-7.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:630b13e3036e13c7adc480ca42fa7afc2a5d938081d28e20903cf7fd687872e2"}, + {file = "coverage-7.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c9eacf273e885b02a0273bb3a2170f30e2d53a6d53b72dbe02d6701b5296101c"}, + {file = "coverage-7.3.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8f17966e861ff97305e0801134e69db33b143bbfb36436efb9cfff6ec7b2fd9"}, + {file = "coverage-7.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b4275802d16882cf9c8b3d057a0839acb07ee9379fa2749eca54efbce1535b82"}, + {file = "coverage-7.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:72c0cfa5250f483181e677ebc97133ea1ab3eb68645e494775deb6a7f6f83901"}, + {file = "coverage-7.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:cb536f0dcd14149425996821a168f6e269d7dcd2c273a8bff8201e79f5104e76"}, + {file = "coverage-7.3.2-cp38-cp38-win32.whl", hash = "sha256:307adb8bd3abe389a471e649038a71b4eb13bfd6b7dd9a129fa856f5c695cf92"}, + {file = "coverage-7.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:88ed2c30a49ea81ea3b7f172e0269c182a44c236eb394718f976239892c0a27a"}, + {file = "coverage-7.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b631c92dfe601adf8f5ebc7fc13ced6bb6e9609b19d9a8cd59fa47c4186ad1ce"}, + {file = "coverage-7.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d3d9df4051c4a7d13036524b66ecf7a7537d14c18a384043f30a303b146164e9"}, + {file = "coverage-7.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f7363d3b6a1119ef05015959ca24a9afc0ea8a02c687fe7e2d557705375c01f"}, + {file = "coverage-7.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2f11cc3c967a09d3695d2a6f03fb3e6236622b93be7a4b5dc09166a861be6d25"}, + {file = "coverage-7.3.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:149de1d2401ae4655c436a3dced6dd153f4c3309f599c3d4bd97ab172eaf02d9"}, + {file = "coverage-7.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:3a4006916aa6fee7cd38db3bfc95aa9c54ebb4ffbfc47c677c8bba949ceba0a6"}, + {file = "coverage-7.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9028a3871280110d6e1aa2df1afd5ef003bab5fb1ef421d6dc748ae1c8ef2ebc"}, + {file = "coverage-7.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9f805d62aec8eb92bab5b61c0f07329275b6f41c97d80e847b03eb894f38d083"}, + {file = "coverage-7.3.2-cp39-cp39-win32.whl", hash = "sha256:d1c88ec1a7ff4ebca0219f5b1ef863451d828cccf889c173e1253aa84b1e07ce"}, + {file = "coverage-7.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b4767da59464bb593c07afceaddea61b154136300881844768037fd5e859353f"}, + {file = "coverage-7.3.2-pp38.pp39.pp310-none-any.whl", hash = "sha256:ae97af89f0fbf373400970c0a21eef5aa941ffeed90aee43650b81f7d7f47637"}, + {file = "coverage-7.3.2.tar.gz", hash = "sha256:be32ad29341b0170e795ca590e1c07e81fc061cb5b10c74ce7203491484404ef"}, +] + +[package.extras] +toml = ["tomli"] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "mypy" +version = "1.7.1" +description = "Optional static typing for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "mypy-1.7.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:12cce78e329838d70a204293e7b29af9faa3ab14899aec397798a4b41be7f340"}, + {file = "mypy-1.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1484b8fa2c10adf4474f016e09d7a159602f3239075c7bf9f1627f5acf40ad49"}, + {file = "mypy-1.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31902408f4bf54108bbfb2e35369877c01c95adc6192958684473658c322c8a5"}, + {file = "mypy-1.7.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f2c2521a8e4d6d769e3234350ba7b65ff5d527137cdcde13ff4d99114b0c8e7d"}, + {file = "mypy-1.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:fcd2572dd4519e8a6642b733cd3a8cfc1ef94bafd0c1ceed9c94fe736cb65b6a"}, + {file = "mypy-1.7.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4b901927f16224d0d143b925ce9a4e6b3a758010673eeded9b748f250cf4e8f7"}, + {file = "mypy-1.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2f7f6985d05a4e3ce8255396df363046c28bea790e40617654e91ed580ca7c51"}, + {file = "mypy-1.7.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:944bdc21ebd620eafefc090cdf83158393ec2b1391578359776c00de00e8907a"}, + {file = "mypy-1.7.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9c7ac372232c928fff0645d85f273a726970c014749b924ce5710d7d89763a28"}, + {file = "mypy-1.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:f6efc9bd72258f89a3816e3a98c09d36f079c223aa345c659622f056b760ab42"}, + {file = "mypy-1.7.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6dbdec441c60699288adf051f51a5d512b0d818526d1dcfff5a41f8cd8b4aaf1"}, + {file = "mypy-1.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4fc3d14ee80cd22367caaaf6e014494415bf440980a3045bf5045b525680ac33"}, + {file = "mypy-1.7.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c6e4464ed5f01dc44dc9821caf67b60a4e5c3b04278286a85c067010653a0eb"}, + {file = "mypy-1.7.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:d9b338c19fa2412f76e17525c1b4f2c687a55b156320acb588df79f2e6fa9fea"}, + {file = "mypy-1.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:204e0d6de5fd2317394a4eff62065614c4892d5a4d1a7ee55b765d7a3d9e3f82"}, + {file = "mypy-1.7.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:84860e06ba363d9c0eeabd45ac0fde4b903ad7aa4f93cd8b648385a888e23200"}, + {file = "mypy-1.7.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8c5091ebd294f7628eb25ea554852a52058ac81472c921150e3a61cdd68f75a7"}, + {file = "mypy-1.7.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40716d1f821b89838589e5b3106ebbc23636ffdef5abc31f7cd0266db936067e"}, + {file = "mypy-1.7.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5cf3f0c5ac72139797953bd50bc6c95ac13075e62dbfcc923571180bebb662e9"}, + {file = "mypy-1.7.1-cp38-cp38-win_amd64.whl", hash = "sha256:78e25b2fd6cbb55ddfb8058417df193f0129cad5f4ee75d1502248e588d9e0d7"}, + {file = "mypy-1.7.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:75c4d2a6effd015786c87774e04331b6da863fc3fc4e8adfc3b40aa55ab516fe"}, + {file = "mypy-1.7.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2643d145af5292ee956aa0a83c2ce1038a3bdb26e033dadeb2f7066fb0c9abce"}, + {file = "mypy-1.7.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75aa828610b67462ffe3057d4d8a4112105ed211596b750b53cbfe182f44777a"}, + {file = "mypy-1.7.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ee5d62d28b854eb61889cde4e1dbc10fbaa5560cb39780c3995f6737f7e82120"}, + {file = "mypy-1.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:72cf32ce7dd3562373f78bd751f73c96cfb441de147cc2448a92c1a308bd0ca6"}, + {file = "mypy-1.7.1-py3-none-any.whl", hash = "sha256:f7c5d642db47376a0cc130f0de6d055056e010debdaf0707cd2b0fc7e7ef30ea"}, + {file = "mypy-1.7.1.tar.gz", hash = "sha256:fcb6d9afb1b6208b4c712af0dafdc650f518836065df0d4fb1d800f5d6773db2"}, +] + +[package.dependencies] +mypy-extensions = ">=1.0.0" +typing-extensions = ">=4.1.0" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +install-types = ["pip"] +mypyc = ["setuptools (>=50)"] +reports = ["lxml"] + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] + +[[package]] +name = "packaging" +version = "23.2" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, + {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, +] + [[package]] name = "pika" version = "1.3.2" @@ -16,7 +170,53 @@ gevent = ["gevent"] tornado = ["tornado"] twisted = ["twisted"] +[[package]] +name = "pluggy" +version = "1.3.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"}, + {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pytest" +version = "7.4.3" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-7.4.3-py3-none-any.whl", hash = "sha256:0d009c083ea859a71b76adf7c1d502e4bc170b80a8ef002da5806527b9591fac"}, + {file = "pytest-7.4.3.tar.gz", hash = "sha256:d989d136982de4e3b29dabcc838ad581c64e8ed52c11fbe86ddebd9da0818cd5"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "typing-extensions" +version = "4.8.0" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +files = [ + {file = "typing_extensions-4.8.0-py3-none-any.whl", hash = "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0"}, + {file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"}, +] + [metadata] lock-version = "2.0" -python-versions = "^3.12" -content-hash = "8c34279cc0de5600b684c04804c053018d625919406026acf7c64f821929205e" +python-versions = "^3.11" +content-hash = "efdbc1051d5965ad018cb20ee591664d4975b097fe10dc948c9a4b11126ba3d1" diff --git a/producer/pyproject.toml b/producer/pyproject.toml index 1c97623..9e04a98 100644 --- a/producer/pyproject.toml +++ b/producer/pyproject.toml @@ -6,10 +6,15 @@ authors = ["alexau "] readme = "README.md" [tool.poetry.dependencies] -python = "^3.12" +python = "^3.11" pika = "^1.3.2" +[tool.poetry.group.dev.dependencies] +pytest = "^7.4.3" +coverage = "^7.3.2" +mypy = "^1.7.1" + [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" diff --git a/producer/requirements-dev.txt b/producer/requirements-dev.txt new file mode 100644 index 0000000..04d0e8c --- /dev/null +++ b/producer/requirements-dev.txt @@ -0,0 +1,10 @@ +colorama==0.4.6 ; python_version >= "3.11" and python_version < "4.0" and sys_platform == "win32" +coverage==7.3.2 ; python_version >= "3.11" and python_version < "4.0" +iniconfig==2.0.0 ; python_version >= "3.11" and python_version < "4.0" +mypy-extensions==1.0.0 ; python_version >= "3.11" and python_version < "4.0" +mypy==1.7.1 ; python_version >= "3.11" and python_version < "4.0" +packaging==23.2 ; python_version >= "3.11" and python_version < "4.0" +pika==1.3.2 ; python_version >= "3.11" and python_version < "4.0" +pluggy==1.3.0 ; python_version >= "3.11" and python_version < "4.0" +pytest==7.4.3 ; python_version >= "3.11" and python_version < "4.0" +typing-extensions==4.8.0 ; python_version >= "3.11" and python_version < "4.0" diff --git a/producer/requirements.txt b/producer/requirements.txt new file mode 100644 index 0000000..68bf7d3 --- /dev/null +++ b/producer/requirements.txt @@ -0,0 +1 @@ +pika==1.3.2 ; python_version >= "3.11" and python_version < "4.0" diff --git a/producer/src/adapters/publish_filenames/__init__.py b/producer/src/adapters/publish_filenames/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/producer/src/adapters/upsert_filenames/rabbitmq.py b/producer/src/adapters/publish_filenames/rabbitmq.py similarity index 52% rename from producer/src/adapters/upsert_filenames/rabbitmq.py rename to producer/src/adapters/publish_filenames/rabbitmq.py index a75389c..18312e2 100644 --- a/producer/src/adapters/upsert_filenames/rabbitmq.py +++ b/producer/src/adapters/publish_filenames/rabbitmq.py @@ -1,71 +1,69 @@ from contextlib import contextmanager -from usecases import UpsertFilenamesClient +from usecases import PublishFilenamesClient import pika -from pika.channel import Channel -from pika.connection import Connection -from typing import Iterator, Optional, override, overload +from typing import Iterator, Optional, overload, Sequence, TYPE_CHECKING +from typing_extensions import override +from collections.abc import Callable import logging -class RabbitMQUpsertFilenamesClient(UpsertFilenamesClient): +if TYPE_CHECKING: + from pika.channel import Channel + from pika.connection import Connection + + +class RabbitMQPublishFilenamesClient(PublishFilenamesClient): def __init__( self, host: str, port: int, - username: str, - password: str, - queue: str = 'filenames', + credentials_service: Callable[[], tuple[str, str]], + queue: str = "filenames", ) -> None: - credentials = pika.PlainCredentials(username, password) - self._conn_parameters = pika.ConnectionParameters( - host=host, - port=port, - credentials=credentials, - ) + self._host = host + self._port = port + self._credentials_service = credentials_service self._queue = queue self._conn: Optional[Connection] = None - + @overload - def upsert(self, filename: str) -> bool: + def publish(self, filename: str) -> bool: ... - + @overload - def upsert(self, filename: list[str]) -> bool: + def publish(self, filename: Sequence[str]) -> list[bool]: ... - + @override - def upsert(self, filename: str | list[str]) -> bool | list[bool]: + def publish(self, filename: str | Sequence[str]) -> bool | list[bool]: if isinstance(filename, str): - return self._upsert_single(filename) - return self._upsert_batch(filename) + return self._publish_single(filename) + return self._publish_batch(filename) - @override - def upsert_stream(self, filename_iterator: Iterator[str]) -> dict[str, bool]: - successes_map: dict[str, bool] = {} - try: - for filename in filename_iterator: - success = self._upsert_single(filename) - successes_map[filename] = success - except Exception as e: - logging.exception(e) - return successes_map + def _reset_conn(self) -> None: + self._conn = None @contextmanager def _get_amqp_conn(self) -> Iterator[pika.BaseConnection]: if self._conn is None or self._conn.is_closed: - self._conn = pika.BlockingConnection(self._conn_parameters) + username, password = self._credentials_service() + credentials = pika.PlainCredentials(username, password) + conn_parameters = pika.ConnectionParameters( + host=self._host, + port=self._port, + credentials=credentials, + ) + self._conn = pika.BlockingConnection(conn_parameters) yield self._conn - - def _publish_one(self, channel: Channel, filename: str) -> None: + + def _amqp_publish(self, channel: Channel, filename: str) -> None: channel.basic_publish( - exchange='', + exchange="", routing_key=self._queue, body=filename, - properties=pika.BasicProperties( - delivery_mode=pika.DeliveryMode.Persistent - ), + properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), ) - def _upsert_single(self, filename: str) -> bool: + def _publish_single(self, filename: str) -> bool: try: with self._get_amqp_conn() as connection: channel = connection.channel() @@ -74,13 +72,14 @@ def _upsert_single(self, filename: str) -> bool: durable=True, ) channel.confirm_delivery() - self._publish_one(channel, filename) + self._amqp_publish(channel, filename) return True except Exception as e: logging.exception(e) + self._reset_conn() return False - - def _upsert_batch(self, filenames: list[str]) -> list[bool]: + + def _publish_batch(self, filenames: Sequence[str]) -> list[bool]: successes = [] try: with self._get_amqp_conn() as connection: @@ -91,13 +90,14 @@ def _upsert_batch(self, filenames: list[str]) -> list[bool]: ) for filename in filenames: try: - self._publish_one(channel, filename) + self._amqp_publish(channel, filename) successes.append(True) except Exception as e: logging.exception(e) successes.append(False) except Exception as e: logging.exception(e) + self._reset_conn() return [False] * len(filenames) return successes @@ -111,4 +111,3 @@ def close(self) -> bool: except Exception as e: logging.exception(e) return False - \ No newline at end of file diff --git a/producer/src/deployments/script/main.py b/producer/src/deployments/script/main.py index 16fcf1d..48f1edb 100644 --- a/producer/src/deployments/script/main.py +++ b/producer/src/deployments/script/main.py @@ -1,36 +1,45 @@ import pathlib from typing import Iterator -from adapters.upsert_filenames.rabbitmq import RabbitMQUpsertFilenamesClient +from adapters.publish_filenames.rabbitmq import RabbitMQPublishFilenamesClient from .config import RabbitMQConfig, ProjectConfig from .setup_logging import setup_logging import logging setup_logging() -upsert_filenames_client = RabbitMQUpsertFilenamesClient( +publish_filenames_client = RabbitMQPublishFilenamesClient( host=RabbitMQConfig.HOST, port=RabbitMQConfig.PORT, - username=RabbitMQConfig.USERNAME, - password=RabbitMQConfig.PASSWORD, + credentials_service=lambda: (RabbitMQConfig.USERNAME, RabbitMQConfig.PASSWORD), queue=RabbitMQConfig.QUEUE, ) + def traverse_files() -> Iterator[str]: - for filename in pathlib.Path(ProjectConfig.TARGET_FILE_DIR).glob(f'*{ProjectConfig.TARGET_FILE_EXTENSION}'): + for filename in pathlib.Path(ProjectConfig.TARGET_FILE_DIR).glob( + f"*{ProjectConfig.TARGET_FILE_EXTENSION}" + ): yield filename + def main() -> None: + successes_map = {} try: - successes_map = upsert_filenames_client.upsert_stream(traverse_files()) - failed_filenames = [filename for filename, success in successes_map.items() if not success] - if failed_filenames: - raise Exception(f'Failed to upsert filenames: {failed_filenames}') + for filename in traverse_files(): + successes_map[filename] = publish_filenames_client.publish(filename) + + failed_filenames = [ + filename for filename, success in successes_map.items() if not success + ] + if failed_filenames: + raise Exception(f"Failed to publish filenames: {failed_filenames}") + logging.info("Successfully published all filenames") except Exception as e: logging.exception(e) raise e finally: - upsert_filenames_client.close() + publish_filenames_client.close() + -if __name__ == '__main__': +if __name__ == "__main__": main() - \ No newline at end of file diff --git a/producer/src/deployments/script/setup_logging.py b/producer/src/deployments/script/setup_logging.py index 55fc1d2..161394c 100644 --- a/producer/src/deployments/script/setup_logging.py +++ b/producer/src/deployments/script/setup_logging.py @@ -3,51 +3,68 @@ from config import LoggingConfig import pathlib + def setup_logging() -> None: LOG_LEVEL_INT = getattr(logging, LoggingConfig.LOG_LEVEL.upper(), None) pathlib.Path(LoggingConfig.LOG_DIR).mkdir(parents=True, exist_ok=True) - handlers = [] + handlers: list[logging.Handler] = [] stream_handler = logging.StreamHandler() - stream_handler.setFormatter(logging.Formatter(LoggingConfig.LOG_FORMAT, datefmt = LoggingConfig.LOG_DATE_FORMAT)) + stream_handler.setFormatter( + logging.Formatter( + LoggingConfig.LOG_FORMAT, datefmt=LoggingConfig.LOG_DATE_FORMAT + ) + ) stream_handler.setLevel(LoggingConfig.LOG_LEVEL) handlers.append(stream_handler) if LOG_LEVEL_INT is not None and LOG_LEVEL_INT <= logging.INFO: info_handler = TimedRotatingFileHandler( - filename=f'{LoggingConfig.LOG_DIR}/info.log', + filename=f"{LoggingConfig.LOG_DIR}/info.log", when=LoggingConfig.LOG_ROTATION, interval=1, backupCount=LoggingConfig.LOG_RETENTION, ) - info_handler.setFormatter(logging.Formatter(LoggingConfig.LOG_FORMAT, datefmt = LoggingConfig.LOG_DATE_FORMAT)) + info_handler.setFormatter( + logging.Formatter( + LoggingConfig.LOG_FORMAT, datefmt=LoggingConfig.LOG_DATE_FORMAT + ) + ) info_handler.setLevel(logging.INFO) handlers.append(info_handler) if LOG_LEVEL_INT is not None and LOG_LEVEL_INT <= logging.WARNING: warning_handler = TimedRotatingFileHandler( - filename=f'{LoggingConfig.LOG_DIR}/warning.log', + filename=f"{LoggingConfig.LOG_DIR}/warning.log", when=LoggingConfig.LOG_ROTATION, interval=1, backupCount=LoggingConfig.LOG_RETENTION, ) - warning_handler.setFormatter(logging.Formatter(LoggingConfig.LOG_FORMAT, datefmt = LoggingConfig.LOG_DATE_FORMAT)) + warning_handler.setFormatter( + logging.Formatter( + LoggingConfig.LOG_FORMAT, datefmt=LoggingConfig.LOG_DATE_FORMAT + ) + ) warning_handler.setLevel(logging.WARNING) handlers.append(warning_handler) if LOG_LEVEL_INT is not None and LOG_LEVEL_INT <= logging.ERROR: error_handler = TimedRotatingFileHandler( - filename=f'{LoggingConfig.LOG_DIR}/error.log', + filename=f"{LoggingConfig.LOG_DIR}/error.log", when=LoggingConfig.LOG_ROTATION, interval=1, backupCount=LoggingConfig.LOG_RETENTION, ) - error_handler.setFormatter(logging.Formatter(LoggingConfig.LOG_FORMAT, datefmt = LoggingConfig.LOG_DATE_FORMAT)) + error_handler.setFormatter( + logging.Formatter( + LoggingConfig.LOG_FORMAT, datefmt=LoggingConfig.LOG_DATE_FORMAT + ) + ) error_handler.setLevel(logging.ERROR) handlers.append(error_handler) - + root_logger = logging.getLogger() root_logger.setLevel(LoggingConfig.LOG_LEVEL) - root_logger.handlers = handlers \ No newline at end of file + root_logger.handlers = handlers diff --git a/producer/src/usecases/__init__.py b/producer/src/usecases/__init__.py index 9596dae..8aa2f2b 100644 --- a/producer/src/usecases/__init__.py +++ b/producer/src/usecases/__init__.py @@ -1 +1 @@ -from .upsert_filenames import UpsertFilenamesClient \ No newline at end of file +from .publish_filenames import PublishFilenamesClient diff --git a/producer/src/usecases/publish_filenames.py b/producer/src/usecases/publish_filenames.py new file mode 100644 index 0000000..264e204 --- /dev/null +++ b/producer/src/usecases/publish_filenames.py @@ -0,0 +1,20 @@ +from abc import ABC, abstractmethod +from typing import overload, Sequence + + +class PublishFilenamesClient(ABC): + @overload + def publish(self, filename: str) -> bool: # type: ignore[overload-overlap] + ... + + @overload + def publish(self, filename: Sequence[str]) -> list[bool]: + ... + + @abstractmethod + def publish(self, filename: str | Sequence[str]) -> bool | list[bool]: + ... + + @abstractmethod + def close(self) -> bool: + ... diff --git a/producer/src/usecases/upsert_filenames.py b/producer/src/usecases/upsert_filenames.py deleted file mode 100644 index 107f9cf..0000000 --- a/producer/src/usecases/upsert_filenames.py +++ /dev/null @@ -1,25 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Iterator, overload - -class UpsertFilenamesClient(ABC): - - @overload - def upsert(self, filename: str) -> bool: - ... - - @overload - def upsert(self, filename: list[str]) -> list[bool]: - ... - - @abstractmethod - def upsert(self, filename: str | list[str]) -> bool | list[bool]: - ... - - @abstractmethod - def upsert_stream(self, filename_iterator: Iterator[str]) -> dict[str, bool]: - ... - - @abstractmethod - def close(self) -> bool: - ... - \ No newline at end of file diff --git a/producer/tests/test_adapters/__init__.py b/producer/tests/test_adapters/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/producer/tests/test_adapters/test_publish_filenames/__init__.py b/producer/tests/test_adapters/test_publish_filenames/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/producer/tests/test_adapters/test_publish_filenames/conftest.py b/producer/tests/test_adapters/test_publish_filenames/conftest.py new file mode 100644 index 0000000..91077fd --- /dev/null +++ b/producer/tests/test_adapters/test_publish_filenames/conftest.py @@ -0,0 +1,13 @@ +import string +import random +import pytest + + +@pytest.fixture +def random_filenames() -> list[str]: + return [ + "".join(random.choices(string.ascii_letters, k=10)) + + "." + + "".join(random.choices(string.ascii_letters, k=3)) + for _ in range(10) + ] diff --git a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq.py b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq.py new file mode 100644 index 0000000..ba3053f --- /dev/null +++ b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq.py @@ -0,0 +1,85 @@ +from src.adapters.publish_filenames.rabbitmq import RabbitMQPublishFilenamesClient +import pika +import pytest + + +@pytest.fixture(scope="session") +def rabbitmq_config() -> dict: + return { + "host": "localhost", + "port": 5672, + "credentials_service": lambda: ("guest", "guest"), + "queue": "filenames", + } + + +@pytest.fixture(scope="session") +def rabbitmq_publish_filenames_client( + rabbitmq_config: dict, +) -> RabbitMQPublishFilenamesClient: + return RabbitMQPublishFilenamesClient(**rabbitmq_config) + + +@pytest.fixture(scope="session") +def raw_rabbitmq_pika_conn_config( + rabbitmq_config: dict, +) -> tuple[pika.BaseConnection, str]: + pika_conn = pika.BlockingConnection( + pika.ConnectionParameters( + host=rabbitmq_config["host"], + port=rabbitmq_config["port"], + credentials=pika.PlainCredentials( + *rabbitmq_config["credentials_service"]() + ), + ) + ) + return pika_conn, rabbitmq_config["queue"] + + +@pytest.fixture(scope="function") +def clean_rabbitmq_queue( + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], +) -> None: + pika_conn, queue = raw_rabbitmq_pika_conn_config + + channel = pika_conn.channel() + channel.queue_purge(queue=queue) + + +class TestSuccessfulPublish: + @pytest.mark.smoke + def test_publish_single( + self, + rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + random_filenames: list[str], + ): + for filename in random_filenames: + assert rabbitmq_publish_filenames_client.publish(filename) + + pika_conn, queue = raw_rabbitmq_pika_conn_config + + channel = pika_conn.channel() + for filename in random_filenames: + method_frame, _, body = channel.basic_get(queue=queue) + assert method_frame is not None + assert body.decode() == filename + channel.basic_ack(method_frame.delivery_tag) + + @pytest.mark.smoke + def test_publish_batch( + self, + rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + random_filenames: list[str], + ): + assert all(rabbitmq_publish_filenames_client.publish(random_filenames)) + + pika_conn, queue = raw_rabbitmq_pika_conn_config + + channel = pika_conn.channel() + for filename in random_filenames: + method_frame, _, body = channel.basic_get(queue=queue) + assert method_frame is not None + assert body.decode() == filename + channel.basic_ack(method_frame.delivery_tag) From 8a702ee1cc47a42027effcbf457cda91628a6ad0 Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 16:58:54 +0800 Subject: [PATCH 04/36] Added test for the producer --- .github/workflows/test.yml | 30 ++- .gitignore | 3 + Makefile | 2 + consumer/pyproject.toml | 6 + .../src/adapters/fetch_filenames/rabbitmq.py | 11 +- .../adapters/file_parse_iot_records/csv.py | 3 +- .../adapters/upsert_iot_records/postgres.py | 7 +- consumer/src/deployments/scripts/main.py | 5 +- docker-compose.test.yml | 27 +++ producer/dockerfile | 4 +- producer/pyproject.toml | 6 + .../adapters/publish_filenames/rabbitmq.py | 49 +++-- producer/src/deployments/script/main.py | 18 +- .../src/deployments/script/setup_logging.py | 2 +- producer/src/usecases/publish_filenames.py | 8 +- .../test_publish_filenames/conftest.py | 13 -- .../test_publish_filenames/test_rabbitmq.py | 85 --------- .../test_rabbitmq/__init__.py | 0 .../test_rabbitmq/conftest.py | 63 +++++++ .../test_rabbitmq/test_close_conn_failed.py | 23 +++ .../test_close_conn_successful.py | 16 ++ .../test_rabbitmq/test_failed_conn.py | 177 ++++++++++++++++++ .../test_rabbitmq/test_failed_publish.py | 117 ++++++++++++ .../test_rabbitmq/test_successful_publish.py | 45 +++++ .../test_rabbitmq/utils.py | 11 ++ producer/tests/test_deployments/__init__.py | 0 .../test_deployments/test_main/__init__.py | 0 .../test_deployments/test_main/conftest.py | 81 ++++++++ .../test_main/test_main_function_failed.py | 49 +++++ .../test_main_function_successful.py | 43 +++++ .../test_main/test_traverse_files.py | 117 ++++++++++++ .../tests/test_deployments/test_main/utils.py | 8 + 32 files changed, 866 insertions(+), 163 deletions(-) create mode 100644 .gitignore create mode 100644 docker-compose.test.yml delete mode 100644 producer/tests/test_adapters/test_publish_filenames/test_rabbitmq.py create mode 100644 producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/__init__.py create mode 100644 producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/conftest.py create mode 100644 producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_close_conn_failed.py create mode 100644 producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_close_conn_successful.py create mode 100644 producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_conn.py create mode 100644 producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_publish.py create mode 100644 producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_successful_publish.py create mode 100644 producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/utils.py create mode 100644 producer/tests/test_deployments/__init__.py create mode 100644 producer/tests/test_deployments/test_main/__init__.py create mode 100644 producer/tests/test_deployments/test_main/conftest.py create mode 100644 producer/tests/test_deployments/test_main/test_main_function_failed.py create mode 100644 producer/tests/test_deployments/test_main/test_main_function_successful.py create mode 100644 producer/tests/test_deployments/test_main/test_traverse_files.py create mode 100644 producer/tests/test_deployments/test_main/utils.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6fe79d9..dfb5187 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -2,9 +2,6 @@ name: Producer Consumer CI Test on: push: branches: ["dev"] - paths: - - 'env/test/**' - - 'modules/**' workflow_dispatch: jobs: load-dotenv: @@ -42,6 +39,8 @@ jobs: test-producer: needs: load-dotenv runs-on: ubuntu-latest + env: + WORK_DIR: producer services: rabbitmq: image: rabbitmq:${{ needs.load-dotenv.outputs.rabbitmq-version-tag }} @@ -60,10 +59,33 @@ jobs: with: python-version: '3.11' cache: 'pip' - cache-dependency-path: requirements-dev.txt + cache-dependency-path: ${{ env.WORK_DIR }}/requirements-dev.txt - name: Install dependencies + working-directory: ${{ env.WORK_DIR }} run: | pip install -r requirements-dev.txt - name: Run tests + working-directory: ${{ env.WORK_DIR }} run: | coverage run -m pytest -v + coverage xml -o coverage.xml + env: + POSTGRES_HOST: localhost + POSTGRES_PORT: ${{ needs.load-dotenv.outputs.postgres-port }} + POSTGRES_USER: ${{ needs.load-dotenv.outputs.postgres-user }} + POSTGRES_PASSWORD: ${{ needs.load-dotenv.outputs.postgres-password }} + POSTGRES_DATABASE: ${{ needs.load-dotenv.outputs.postgres-database }} + RABBITMQ_HOST: localhost + RABBITMQ_PORT: ${{ needs.load-dotenv.outputs.rabbitmq-port }} + RABBITMQ_USER: ${{ needs.load-dotenv.outputs.rabbitmq-user }} + RABBITMQ_PASSWORD: ${{ needs.load-dotenv.outputs.rabbitmq-password }} + QUEUE_NAME: ${{ needs.load-dotenv.outputs.queue-name }} + - name: Upload coverage + uses: codecov/codecov-action@v2 + with: + file: ${{ env.WORK_DIR }}/coverage.xml + - name: Coveralls + uses: coverallsapp/github-action@master + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + path-to-lcov: ${{ env.WORK_DIR }}/coverage.xml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..366f53c --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.pytest_cache +__pycache__ +.mypy_cache diff --git a/Makefile b/Makefile index 4346ca3..8393a90 100644 --- a/Makefile +++ b/Makefile @@ -13,3 +13,5 @@ export_requirements: poetry export -f requirements.txt --output requirements.txt --without-hashes && \ cd ../consumer && \ poetry export -f requirements.txt --output requirements.txt --without-hashes +test_env: + docker compose -f docker-compose.test.yml up -d diff --git a/consumer/pyproject.toml b/consumer/pyproject.toml index 64d7d70..ba0a9b6 100644 --- a/consumer/pyproject.toml +++ b/consumer/pyproject.toml @@ -17,6 +17,12 @@ pytest = "^7.4.3" coverage = "^7.3.2" mypy = "^1.7.1" +[tool.pytest.ini_options] +markers = [ + "slow: marks tests as slow (deselect with '-m \"not slow\"')", + "smoke: quick smoke tests", +] + [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" diff --git a/consumer/src/adapters/fetch_filenames/rabbitmq.py b/consumer/src/adapters/fetch_filenames/rabbitmq.py index 3934a53..1b2b89e 100644 --- a/consumer/src/adapters/fetch_filenames/rabbitmq.py +++ b/consumer/src/adapters/fetch_filenames/rabbitmq.py @@ -1,18 +1,15 @@ from contextlib import contextmanager from usecases import FetchFilenameClient import pika -from typing import Iterator, Optional, TYPE_CHECKING, final +from pika.adapters.blocking_connection import BlockingChannel +from pika.spec import Basic, BasicProperties +from pika.connection import Connection +from typing import Iterator, Optional from typing_extensions import override from collections.abc import Callable import logging -if TYPE_CHECKING: - from pika.adapters.blocking_connection import BlockingChannel - from pika.spec import Basic, BasicProperties - from pika.connection import Connection - -@final class RabbitMQFetchFilenamesClient(FetchFilenameClient): def __init__( self, diff --git a/consumer/src/adapters/file_parse_iot_records/csv.py b/consumer/src/adapters/file_parse_iot_records/csv.py index 2b08191..cc562ec 100644 --- a/consumer/src/adapters/file_parse_iot_records/csv.py +++ b/consumer/src/adapters/file_parse_iot_records/csv.py @@ -1,7 +1,7 @@ from concurrent.futures import ThreadPoolExecutor from datetime import datetime from decimal import Decimal -from typing import Iterator, Optional, overload, Sequence, final, TYPE_CHECKING +from typing import Iterator, Optional, overload, Sequence from typing_extensions import override from entities import IOTRecord from usecases import FileParseIOTRecordsClient @@ -9,7 +9,6 @@ import logging -@final class CSVParseIOTRecordsClient(FileParseIOTRecordsClient): def __init__( self, diff --git a/consumer/src/adapters/upsert_iot_records/postgres.py b/consumer/src/adapters/upsert_iot_records/postgres.py index bfe28d9..1e19758 100644 --- a/consumer/src/adapters/upsert_iot_records/postgres.py +++ b/consumer/src/adapters/upsert_iot_records/postgres.py @@ -1,19 +1,16 @@ from contextlib import contextmanager import logging -from typing import Iterator, Optional, Sequence, overload, TYPE_CHECKING, final, TypeVar +from typing import Iterator, Optional, Sequence, overload, TypeVar from typing_extensions import override import psycopg2 +from psycopg2.extensions import connection from usecases import UpsertIOTRecordsClient from entities import IOTRecord from collections.abc import Callable -if TYPE_CHECKING: - from psycopg2.extensions import connection - T = TypeVar("T") -@final class PostgresUpsertIOTRecordsClient(UpsertIOTRecordsClient): def __init__( self, diff --git a/consumer/src/deployments/scripts/main.py b/consumer/src/deployments/scripts/main.py index 0b067f7..21c76ee 100644 --- a/consumer/src/deployments/scripts/main.py +++ b/consumer/src/deployments/scripts/main.py @@ -4,10 +4,7 @@ from config import RabbitMQConfig, PostgresConfig, CSVParserConfig from setup_logging import setup_logging import logging -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from entities import IOTRecord +from entities import IOTRecord setup_logging() diff --git a/docker-compose.test.yml b/docker-compose.test.yml new file mode 100644 index 0000000..53a4dbc --- /dev/null +++ b/docker-compose.test.yml @@ -0,0 +1,27 @@ +version: '3.8' +services: + records_postgres: + image: records_postgres:${POSTGRES_VERSION_TAG} + container_name: records_postgres + build: + context: database + dockerfile: dockerfile + args: + POSTGRES_VERSION_TAG: ${POSTGRES_VERSION_TAG} + environment: + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} + POSTGRES_USER: ${POSTGRES_USER} + POSTGRES_DB: ${POSTGRES_DATABASE} + ports: + - ${POSTGRES_PORT}:5432 + restart: always + records_rabbitmq: + image: rabbitmq:${RABBITMQ_VERSION_TAG} + container_name: records_rabbitmq + environment: + RABBITMQ_DEFAULT_USER: ${RABBITMQ_USER} + RABBITMQ_DEFAULT_PASS: ${RABBITMQ_PASSWORD} + ports: + - ${RABBITMQ_WEBAPP_PORT}:15672 + - ${RABBITMQ_PORT}:5672 + restart: always diff --git a/producer/dockerfile b/producer/dockerfile index b343433..2de51d5 100644 --- a/producer/dockerfile +++ b/producer/dockerfile @@ -32,5 +32,5 @@ WORKDIR ${HOME} COPY --from=build ${HOME}/.local ${HOME}/.local -COPY src/ . -CMD python3.11 -m deployments.script.main +COPY src . +CMD python3.11 -m src.deployments.script.main diff --git a/producer/pyproject.toml b/producer/pyproject.toml index 9e04a98..91d3883 100644 --- a/producer/pyproject.toml +++ b/producer/pyproject.toml @@ -15,6 +15,12 @@ pytest = "^7.4.3" coverage = "^7.3.2" mypy = "^1.7.1" +[tool.pytest.ini_options] +markers = [ + "slow: marks tests as slow (deselect with '-m \"not slow\"')", + "smoke: quick smoke tests", +] + [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" diff --git a/producer/src/adapters/publish_filenames/rabbitmq.py b/producer/src/adapters/publish_filenames/rabbitmq.py index 18312e2..a192e21 100644 --- a/producer/src/adapters/publish_filenames/rabbitmq.py +++ b/producer/src/adapters/publish_filenames/rabbitmq.py @@ -1,15 +1,13 @@ from contextlib import contextmanager -from usecases import PublishFilenamesClient +from ...usecases import PublishFilenamesClient import pika -from typing import Iterator, Optional, overload, Sequence, TYPE_CHECKING +from pika.channel import Channel +from pika.connection import Connection +from typing import Iterator, Optional, overload, Sequence from typing_extensions import override from collections.abc import Callable import logging -if TYPE_CHECKING: - from pika.channel import Channel - from pika.connection import Connection - class RabbitMQPublishFilenamesClient(PublishFilenamesClient): def __init__( @@ -27,11 +25,11 @@ def __init__( @overload def publish(self, filename: str) -> bool: - ... + pass @overload def publish(self, filename: Sequence[str]) -> list[bool]: - ... + pass @override def publish(self, filename: str | Sequence[str]) -> bool | list[bool]: @@ -81,24 +79,20 @@ def _publish_single(self, filename: str) -> bool: def _publish_batch(self, filenames: Sequence[str]) -> list[bool]: successes = [] - try: - with self._get_amqp_conn() as connection: - channel = connection.channel() - channel.queue_declare( - queue=self._queue, - durable=True, - ) - for filename in filenames: - try: - self._amqp_publish(channel, filename) - successes.append(True) - except Exception as e: - logging.exception(e) - successes.append(False) - except Exception as e: - logging.exception(e) - self._reset_conn() - return [False] * len(filenames) + for filename in filenames: + try: + with self._get_amqp_conn() as connection: + channel = connection.channel() + channel.queue_declare( + queue=self._queue, + durable=True, + ) + self._amqp_publish(channel, filename) + successes.append(True) + except Exception as e: + logging.exception(e) + self._reset_conn() + successes.append(False) return successes @override @@ -106,8 +100,7 @@ def close(self) -> bool: try: if self._conn is not None: self._conn.close() - return True - return False + return True except Exception as e: logging.exception(e) return False diff --git a/producer/src/deployments/script/main.py b/producer/src/deployments/script/main.py index 48f1edb..e66c393 100644 --- a/producer/src/deployments/script/main.py +++ b/producer/src/deployments/script/main.py @@ -1,28 +1,30 @@ import pathlib from typing import Iterator -from adapters.publish_filenames.rabbitmq import RabbitMQPublishFilenamesClient +from ...adapters.publish_filenames.rabbitmq import RabbitMQPublishFilenamesClient from .config import RabbitMQConfig, ProjectConfig from .setup_logging import setup_logging import logging setup_logging() -publish_filenames_client = RabbitMQPublishFilenamesClient( - host=RabbitMQConfig.HOST, - port=RabbitMQConfig.PORT, - credentials_service=lambda: (RabbitMQConfig.USERNAME, RabbitMQConfig.PASSWORD), - queue=RabbitMQConfig.QUEUE, -) +logging.getLogger("pika").setLevel(logging.WARNING) def traverse_files() -> Iterator[str]: for filename in pathlib.Path(ProjectConfig.TARGET_FILE_DIR).glob( f"*{ProjectConfig.TARGET_FILE_EXTENSION}" ): - yield filename + yield str(filename) def main() -> None: + publish_filenames_client = RabbitMQPublishFilenamesClient( + host=RabbitMQConfig.HOST, + port=RabbitMQConfig.PORT, + credentials_service=lambda: (RabbitMQConfig.USERNAME, RabbitMQConfig.PASSWORD), + queue=RabbitMQConfig.QUEUE, + ) + successes_map = {} try: for filename in traverse_files(): diff --git a/producer/src/deployments/script/setup_logging.py b/producer/src/deployments/script/setup_logging.py index 161394c..dcae074 100644 --- a/producer/src/deployments/script/setup_logging.py +++ b/producer/src/deployments/script/setup_logging.py @@ -1,6 +1,6 @@ import logging from logging.handlers import TimedRotatingFileHandler -from config import LoggingConfig +from .config import LoggingConfig import pathlib diff --git a/producer/src/usecases/publish_filenames.py b/producer/src/usecases/publish_filenames.py index 264e204..59dac13 100644 --- a/producer/src/usecases/publish_filenames.py +++ b/producer/src/usecases/publish_filenames.py @@ -5,16 +5,16 @@ class PublishFilenamesClient(ABC): @overload def publish(self, filename: str) -> bool: # type: ignore[overload-overlap] - ... + pass @overload def publish(self, filename: Sequence[str]) -> list[bool]: - ... + pass @abstractmethod def publish(self, filename: str | Sequence[str]) -> bool | list[bool]: - ... + pass @abstractmethod def close(self) -> bool: - ... + pass diff --git a/producer/tests/test_adapters/test_publish_filenames/conftest.py b/producer/tests/test_adapters/test_publish_filenames/conftest.py index 91077fd..e69de29 100644 --- a/producer/tests/test_adapters/test_publish_filenames/conftest.py +++ b/producer/tests/test_adapters/test_publish_filenames/conftest.py @@ -1,13 +0,0 @@ -import string -import random -import pytest - - -@pytest.fixture -def random_filenames() -> list[str]: - return [ - "".join(random.choices(string.ascii_letters, k=10)) - + "." - + "".join(random.choices(string.ascii_letters, k=3)) - for _ in range(10) - ] diff --git a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq.py b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq.py deleted file mode 100644 index ba3053f..0000000 --- a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq.py +++ /dev/null @@ -1,85 +0,0 @@ -from src.adapters.publish_filenames.rabbitmq import RabbitMQPublishFilenamesClient -import pika -import pytest - - -@pytest.fixture(scope="session") -def rabbitmq_config() -> dict: - return { - "host": "localhost", - "port": 5672, - "credentials_service": lambda: ("guest", "guest"), - "queue": "filenames", - } - - -@pytest.fixture(scope="session") -def rabbitmq_publish_filenames_client( - rabbitmq_config: dict, -) -> RabbitMQPublishFilenamesClient: - return RabbitMQPublishFilenamesClient(**rabbitmq_config) - - -@pytest.fixture(scope="session") -def raw_rabbitmq_pika_conn_config( - rabbitmq_config: dict, -) -> tuple[pika.BaseConnection, str]: - pika_conn = pika.BlockingConnection( - pika.ConnectionParameters( - host=rabbitmq_config["host"], - port=rabbitmq_config["port"], - credentials=pika.PlainCredentials( - *rabbitmq_config["credentials_service"]() - ), - ) - ) - return pika_conn, rabbitmq_config["queue"] - - -@pytest.fixture(scope="function") -def clean_rabbitmq_queue( - raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], -) -> None: - pika_conn, queue = raw_rabbitmq_pika_conn_config - - channel = pika_conn.channel() - channel.queue_purge(queue=queue) - - -class TestSuccessfulPublish: - @pytest.mark.smoke - def test_publish_single( - self, - rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, - raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], - random_filenames: list[str], - ): - for filename in random_filenames: - assert rabbitmq_publish_filenames_client.publish(filename) - - pika_conn, queue = raw_rabbitmq_pika_conn_config - - channel = pika_conn.channel() - for filename in random_filenames: - method_frame, _, body = channel.basic_get(queue=queue) - assert method_frame is not None - assert body.decode() == filename - channel.basic_ack(method_frame.delivery_tag) - - @pytest.mark.smoke - def test_publish_batch( - self, - rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, - raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], - random_filenames: list[str], - ): - assert all(rabbitmq_publish_filenames_client.publish(random_filenames)) - - pika_conn, queue = raw_rabbitmq_pika_conn_config - - channel = pika_conn.channel() - for filename in random_filenames: - method_frame, _, body = channel.basic_get(queue=queue) - assert method_frame is not None - assert body.decode() == filename - channel.basic_ack(method_frame.delivery_tag) diff --git a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/__init__.py b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/conftest.py b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/conftest.py new file mode 100644 index 0000000..778aa54 --- /dev/null +++ b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/conftest.py @@ -0,0 +1,63 @@ +from src.adapters.publish_filenames.rabbitmq import RabbitMQPublishFilenamesClient +import pika +import pytest +from pytest import MonkeyPatch + + +@pytest.fixture(scope="session") +def rabbitmq_config() -> dict: + return { + "host": "localhost", + "port": 5672, + "credentials_service": lambda: ("rabbitmq", "rabbitmq"), + "queue": "filenames", + } + + +@pytest.fixture(scope="function") +def rabbitmq_publish_filenames_client( + rabbitmq_config: dict, +) -> RabbitMQPublishFilenamesClient: + return RabbitMQPublishFilenamesClient(**rabbitmq_config) + + +@pytest.fixture(scope="function") +def raw_rabbitmq_pika_conn_config( + rabbitmq_config: dict, +) -> tuple[pika.BaseConnection, str]: + pika_conn = pika.BlockingConnection( + pika.ConnectionParameters( + host=rabbitmq_config["host"], + port=rabbitmq_config["port"], + credentials=pika.PlainCredentials( + *rabbitmq_config["credentials_service"]() + ), + ) + ) + return pika_conn, rabbitmq_config["queue"] + + +@pytest.fixture(scope="function", autouse=True) +def clean_rabbitmq_queue( + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], +) -> None: + pika_conn, queue = raw_rabbitmq_pika_conn_config + + channel = pika_conn.channel() + channel.queue_purge(queue=queue) + yield + channel.queue_purge(queue=queue) + + +@pytest.fixture(scope="function") +def patch_failed_publish(monkeypatch: MonkeyPatch) -> None: + def mocked_failed_basic_publish( + self, + *args, + **kwargs, + ) -> None: + raise Exception("Failed to publish") + + monkeypatch.setattr( + pika.channel.Channel, "basic_publish", mocked_failed_basic_publish + ) diff --git a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_close_conn_failed.py b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_close_conn_failed.py new file mode 100644 index 0000000..5a1ea56 --- /dev/null +++ b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_close_conn_failed.py @@ -0,0 +1,23 @@ +from pytest import MonkeyPatch +import pika +from src.adapters.publish_filenames.rabbitmq import RabbitMQPublishFilenamesClient +from .utils import random_filenames + + +def test_close_conn_failed( + rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, + monkeypatch: MonkeyPatch, +): + rabbitmq_publish_filenames_client.publish(random_filenames()[0]) + + assert rabbitmq_publish_filenames_client._conn is not None + + def mock_failed_close( + self, + *args, + **kwargs, + ) -> None: + raise Exception("Failed to close!") + + monkeypatch.setattr(pika.BlockingConnection, "close", mock_failed_close) + assert not rabbitmq_publish_filenames_client.close() diff --git a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_close_conn_successful.py b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_close_conn_successful.py new file mode 100644 index 0000000..7de1f42 --- /dev/null +++ b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_close_conn_successful.py @@ -0,0 +1,16 @@ +from src.adapters.publish_filenames.rabbitmq import RabbitMQPublishFilenamesClient +from .utils import random_filenames + + +def test_close_conn_successful( + rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, +): + rabbitmq_publish_filenames_client.publish(random_filenames()[0]) + assert rabbitmq_publish_filenames_client._conn is not None + assert rabbitmq_publish_filenames_client.close() + + +def test_none_conn_close_successful( + rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, +): + assert rabbitmq_publish_filenames_client.close() diff --git a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_conn.py b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_conn.py new file mode 100644 index 0000000..43cecaf --- /dev/null +++ b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_conn.py @@ -0,0 +1,177 @@ +import pytest +from .utils import random_filenames +from src.adapters.publish_filenames.rabbitmq import RabbitMQPublishFilenamesClient +import pika +from pytest import MonkeyPatch + + +@pytest.mark.smoke +@pytest.mark.parametrize("filename", random_filenames()) +def test_publish_single_failed_conn( + rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + filename: str, + monkeypatch: MonkeyPatch, +): + def mocked_failed_conn( + self, + *args, + **kwargs, + ) -> None: + raise Exception("Failed to connect") + + monkeypatch.setattr(pika.BlockingConnection, "__init__", mocked_failed_conn) + + with pytest.raises(Exception) as e: + assert not rabbitmq_publish_filenames_client.publish(filename) + assert e.value == "Failed to connect" + + pika_conn, queue = raw_rabbitmq_pika_conn_config + + channel = pika_conn.channel() + method_frame, _, body = channel.basic_get(queue=queue) + assert method_frame is None + assert body is None + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "random_filenames", + [random_filenames() for _ in range(5)], +) +def test_publish_batch_failed_conn( + rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + random_filenames: list[str], + monkeypatch: MonkeyPatch, +): + def mocked_failed_conn( + self, + *args, + **kwargs, + ) -> None: + raise Exception("Failed to connect") + + monkeypatch.setattr(pika.BlockingConnection, "__init__", mocked_failed_conn) + + with pytest.raises(Exception) as e: + assert not any(rabbitmq_publish_filenames_client.publish(random_filenames)) + assert e.value == "Failed to connect" + + pika_conn, queue = raw_rabbitmq_pika_conn_config + + channel = pika_conn.channel() + for _ in random_filenames: + method_frame, _, body = channel.basic_get(queue=queue) + assert method_frame is None + assert body is None + + +@pytest.mark.smoke +@pytest.mark.parametrize("filename", random_filenames()) +def test_publish_single_wrong_credentials( + rabbitmq_config: dict, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + filename: str, +): + copied_rabbitmq_config = rabbitmq_config.copy() + copied_rabbitmq_config["credentials_service"] = lambda: ("wrong", "wrong") + rabbitmq_publish_filenames_client = RabbitMQPublishFilenamesClient( + **copied_rabbitmq_config + ) + + with pytest.raises(Exception) as e: + assert not rabbitmq_publish_filenames_client.publish(filename) + assert "ACCESS_REFUSED" in e.value and "403" in e.value + + pika_conn, queue = raw_rabbitmq_pika_conn_config + channel = pika_conn.channel() + method_frame, _, body = channel.basic_get(queue=queue) + assert method_frame is None + assert body is None + + +@pytest.mark.slow +@pytest.mark.smoke +@pytest.mark.parametrize("filename", random_filenames()) +def test_publish_single_wrong_host( + rabbitmq_config: dict, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + filename: str, +): + copied_rabbitmq_config = rabbitmq_config.copy() + copied_rabbitmq_config["host"] = "wrong" + rabbitmq_publish_filenames_client = RabbitMQPublishFilenamesClient( + **copied_rabbitmq_config + ) + + with pytest.raises(Exception) as e: + assert not rabbitmq_publish_filenames_client.publish(filename) + assert "ACCESS_REFUSED" in e.value and "403" in e.value + + pika_conn, queue = raw_rabbitmq_pika_conn_config + channel = pika_conn.channel() + method_frame, _, body = channel.basic_get(queue=queue) + assert method_frame is None + assert body is None + + +@pytest.mark.slow +@pytest.mark.parametrize("filename", random_filenames()) +def test_publish_single_failed_conn_reset_conn( + rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, + filename: str, + monkeypatch: MonkeyPatch, +): + assert rabbitmq_publish_filenames_client.publish(filename) + conn = rabbitmq_publish_filenames_client._conn + + def mock_failed_basic_publish( + self, + *args, + **kwargs, + ) -> None: + raise Exception("Failed to publish!") + + monkeypatch.setattr( + pika.channel.Channel, "basic_publish", mock_failed_basic_publish + ) + + assert not rabbitmq_publish_filenames_client.publish(filename) + + monkeypatch.undo() + + assert rabbitmq_publish_filenames_client.publish(filename) + assert rabbitmq_publish_filenames_client._conn != conn + + +@pytest.mark.slow +@pytest.mark.parametrize( + "random_filenames", + [random_filenames() for _ in range(5)], +) +def test_publish_batch_failed_conn_reset_conn( + rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, + random_filenames: list[str], + monkeypatch: MonkeyPatch, +): + assert all(rabbitmq_publish_filenames_client.publish(random_filenames)) + conn = rabbitmq_publish_filenames_client._conn + + def mock_failed_basic_publish( + self, + *args, + **kwargs, + ) -> None: + raise Exception("Failed to publish!") + + monkeypatch.setattr( + pika.channel.Channel, "basic_publish", mock_failed_basic_publish + ) + + assert not any(rabbitmq_publish_filenames_client.publish(random_filenames)) + + monkeypatch.undo() + + assert rabbitmq_publish_filenames_client.publish(random_filenames) + assert rabbitmq_publish_filenames_client._conn != conn diff --git a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_publish.py b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_publish.py new file mode 100644 index 0000000..485292a --- /dev/null +++ b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_publish.py @@ -0,0 +1,117 @@ +import pytest +from .utils import random_filenames +from src.adapters.publish_filenames.rabbitmq import RabbitMQPublishFilenamesClient +import pika +import pytest +from pytest import MonkeyPatch + + +@pytest.mark.smoke +@pytest.mark.usefixtures("patch_failed_publish") +@pytest.mark.parametrize("filename", random_filenames()) +def test_publish_single_failed( + rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + filename: str, +): + with pytest.raises(Exception) as e: + assert not rabbitmq_publish_filenames_client.publish(filename) + assert e.value == "Failed to publish" + + pika_conn, queue = raw_rabbitmq_pika_conn_config + + channel = pika_conn.channel() + method_frame, _, body = channel.basic_get(queue=queue) + assert method_frame is None + assert body is None + + +@pytest.mark.smoke +@pytest.mark.usefixtures("patch_failed_publish") +@pytest.mark.parametrize( + "random_filenames", + [random_filenames() for _ in range(5)], +) +def test_publish_batch_failed( + rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + random_filenames: list[str], +): + with pytest.raises(Exception) as e: + assert not any(rabbitmq_publish_filenames_client.publish(random_filenames)) + assert e.value == "Failed to publish" + + pika_conn, queue = raw_rabbitmq_pika_conn_config + + channel = pika_conn.channel() + for _ in random_filenames: + method_frame, _, body = channel.basic_get(queue=queue) + assert method_frame is None + assert body is None + + +@pytest.mark.parametrize( + "random_filenames", + [random_filenames() for _ in range(5)], +) +def test_publish_batch_partial_failed( + rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + random_filenames: list[str], + monkeypatch: MonkeyPatch, +): + counter = 0 + + def mocked_partially_failed_basic_publish( + self, + *args, + **kwargs, + ) -> None: + nonlocal counter + counter += 1 + if counter == 3: + raise Exception("Failed to publish") + else: + with rabbitmq_publish_filenames_client._get_amqp_conn() as connection: + channel = connection.channel() + channel.queue_declare( + queue=rabbitmq_publish_filenames_client._queue, + durable=True, + ) + channel.confirm_delivery() + channel.basic_publish( + exchange="", + routing_key=rabbitmq_publish_filenames_client._queue, + body=args[0], + properties=pika.BasicProperties( + delivery_mode=pika.DeliveryMode.Persistent, + ), + ) + + monkeypatch.setattr( + rabbitmq_publish_filenames_client, + "_amqp_publish", + mocked_partially_failed_basic_publish, + ) + + with pytest.raises(Exception) as e: + publish_successes = rabbitmq_publish_filenames_client.publish(random_filenames) + + successes_filenames = [ + filename + for filename, success in zip(random_filenames, publish_successes) + if success + ] + assert not all(publish_successes) + assert any(publish_successes) + assert publish_successes[2] == False + assert e.value == "Failed to publish" + + pika_conn, queue = raw_rabbitmq_pika_conn_config + + channel = pika_conn.channel() + for filename in successes_filenames: + method_frame, _, body = channel.basic_get(queue=queue) + assert method_frame is not None + assert body.decode() == filename + channel.basic_ack(method_frame.delivery_tag) diff --git a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_successful_publish.py b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_successful_publish.py new file mode 100644 index 0000000..db8aa79 --- /dev/null +++ b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_successful_publish.py @@ -0,0 +1,45 @@ +import pytest +from .utils import random_filenames +from src.adapters.publish_filenames.rabbitmq import RabbitMQPublishFilenamesClient +import pika +import pytest + + +@pytest.mark.smoke +@pytest.mark.parametrize("filename", random_filenames()) +def test_publish_single_success( + rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + filename: str, +): + assert rabbitmq_publish_filenames_client.publish(filename) + + pika_conn, queue = raw_rabbitmq_pika_conn_config + + channel = pika_conn.channel() + method_frame, _, body = channel.basic_get(queue=queue) + assert method_frame is not None + assert body.decode() == filename + channel.basic_ack(method_frame.delivery_tag) + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "random_filenames", + [random_filenames() for _ in range(5)], +) +def test_publish_batch_success( + rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + random_filenames: list[str], +): + assert all(rabbitmq_publish_filenames_client.publish(random_filenames)) + + pika_conn, queue = raw_rabbitmq_pika_conn_config + + channel = pika_conn.channel() + for filename in random_filenames: + method_frame, _, body = channel.basic_get(queue=queue) + assert method_frame is not None + assert body.decode() == filename + channel.basic_ack(method_frame.delivery_tag) diff --git a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/utils.py b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/utils.py new file mode 100644 index 0000000..8d97d7d --- /dev/null +++ b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/utils.py @@ -0,0 +1,11 @@ +import random +import string + + +def random_filenames() -> list[str]: + return [ + "".join(random.choices(string.ascii_letters, k=10)) + + "." + + "".join(random.choices(string.ascii_letters, k=3)) + for _ in range(5) + ] diff --git a/producer/tests/test_deployments/__init__.py b/producer/tests/test_deployments/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/producer/tests/test_deployments/test_main/__init__.py b/producer/tests/test_deployments/test_main/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/producer/tests/test_deployments/test_main/conftest.py b/producer/tests/test_deployments/test_main/conftest.py new file mode 100644 index 0000000..1d0ac14 --- /dev/null +++ b/producer/tests/test_deployments/test_main/conftest.py @@ -0,0 +1,81 @@ +from typing import Type +from src.deployments.script.config import RabbitMQConfig, ProjectConfig +import pika +import pytest +from pytest import TempdirFactory +import pathlib +import os + + +@pytest.fixture(scope="session") +def mock_rabbitmq_config() -> Type[RabbitMQConfig]: + class MockedRabbitMQConfig(RabbitMQConfig): + HOST = "localhost" + PORT = 5672 + USERNAME = "rabbitmq" + PASSWORD = "rabbitmq" + QUEUE = "filenames" + + return MockedRabbitMQConfig + + +@pytest.fixture(scope="session") +def mock_project_config(tmpdir_factory: TempdirFactory) -> None: + class MockedProjectConfig(ProjectConfig): + TARGET_FILE_DIR = str(tmpdir_factory.mktemp("artifact")) + TARGET_FILE_EXTENSION = ".csv" + + return MockedProjectConfig + + +@pytest.fixture(scope="function") +def raw_rabbitmq_pika_conn_config( + mock_rabbitmq_config: Type[RabbitMQConfig], +) -> tuple[pika.BaseConnection, str]: + pika_conn = pika.BlockingConnection( + pika.ConnectionParameters( + host=mock_rabbitmq_config.HOST, + port=mock_rabbitmq_config.PORT, + credentials=pika.PlainCredentials( + mock_rabbitmq_config.USERNAME, mock_rabbitmq_config.PASSWORD + ), + ) + ) + return pika_conn, mock_rabbitmq_config.QUEUE + + +@pytest.fixture(scope="function", autouse=True) +def clean_rabbitmq_queue( + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], +) -> None: + pika_conn, queue = raw_rabbitmq_pika_conn_config + + channel = pika_conn.channel() + channel.queue_purge(queue=queue) + yield + channel.queue_purge(queue=queue) + + +@pytest.fixture(scope="function", autouse=True) +def clean_artifact_dir(mock_project_config: Type[ProjectConfig]) -> None: + def remove_files_in_dir(dir: pathlib.Path) -> None: + for path in dir.rglob("*"): + if path.is_file(): + path.unlink() + else: + remove_files_in_dir(path) + path.rmdir() + + for path in pathlib.Path(mock_project_config.TARGET_FILE_DIR).rglob("*"): + if path.is_file(): + path.unlink() + else: + remove_files_in_dir(path) + path.rmdir() + yield + for path in pathlib.Path(mock_project_config.TARGET_FILE_DIR).rglob("*"): + if path.is_file(): + path.unlink() + else: + remove_files_in_dir(path) + path.rmdir() diff --git a/producer/tests/test_deployments/test_main/test_main_function_failed.py b/producer/tests/test_deployments/test_main/test_main_function_failed.py new file mode 100644 index 0000000..573d1ef --- /dev/null +++ b/producer/tests/test_deployments/test_main/test_main_function_failed.py @@ -0,0 +1,49 @@ +from src.deployments.script.main import main +from src.deployments.script.config import ProjectConfig, RabbitMQConfig +from typing import Type +import pytest +from .utils import random_csv_filenames +import pathlib +from pytest import MonkeyPatch, LogCaptureFixture + + +@pytest.mark.parametrize( + "random_csv_filenames", + [random_csv_filenames() for _ in range(5)], +) +def test_main_flow_has_failed_files( + mock_rabbitmq_config: Type[RabbitMQConfig], + mock_project_config: Type[ProjectConfig], + random_csv_filenames: list[str], + monkeypatch: MonkeyPatch, + caplog: LogCaptureFixture, +): + for path in [ + pathlib.Path(mock_project_config.TARGET_FILE_DIR).joinpath(filename) + for filename in random_csv_filenames + ]: + path.touch() + + monkeypatch.setattr( + ProjectConfig, "TARGET_FILE_DIR", mock_project_config.TARGET_FILE_DIR + ) + monkeypatch.setattr( + ProjectConfig, + "TARGET_FILE_EXTENSION", + mock_project_config.TARGET_FILE_EXTENSION, + ) + monkeypatch.setattr(RabbitMQConfig, "HOST", mock_rabbitmq_config.HOST) + monkeypatch.setattr(RabbitMQConfig, "PORT", mock_rabbitmq_config.PORT) + monkeypatch.setattr(RabbitMQConfig, "USERNAME", mock_rabbitmq_config.USERNAME) + monkeypatch.setattr(RabbitMQConfig, "PASSWORD", mock_rabbitmq_config.PASSWORD) + monkeypatch.setattr(RabbitMQConfig, "QUEUE", mock_rabbitmq_config.QUEUE) + + monkeypatch.setattr( + "src.adapters.publish_filenames.rabbitmq.RabbitMQPublishFilenamesClient.publish", + lambda self, filename: False, + ) + caplog.at_level("CRITICAL") + with pytest.raises(Exception) as e: + main() + assert "Failed to publish filenames" in str(e.value) + assert "Failed to publish filenames" in caplog.text diff --git a/producer/tests/test_deployments/test_main/test_main_function_successful.py b/producer/tests/test_deployments/test_main/test_main_function_successful.py new file mode 100644 index 0000000..fe8f72a --- /dev/null +++ b/producer/tests/test_deployments/test_main/test_main_function_successful.py @@ -0,0 +1,43 @@ +from src.deployments.script.main import main +from src.deployments.script.config import ProjectConfig, RabbitMQConfig +from typing import Type +import pytest +from .utils import random_csv_filenames +import pathlib +from pytest import MonkeyPatch, LogCaptureFixture + + +@pytest.mark.parametrize( + "random_csv_filenames", + [random_csv_filenames() for _ in range(5)], +) +def test_main_flow_no_failed_files( + mock_rabbitmq_config: Type[RabbitMQConfig], + mock_project_config: Type[ProjectConfig], + random_csv_filenames: list[str], + monkeypatch: MonkeyPatch, + caplog: LogCaptureFixture, +): + for path in [ + pathlib.Path(mock_project_config.TARGET_FILE_DIR).joinpath(filename) + for filename in random_csv_filenames + ]: + path.touch() + + monkeypatch.setattr( + ProjectConfig, "TARGET_FILE_DIR", mock_project_config.TARGET_FILE_DIR + ) + monkeypatch.setattr( + ProjectConfig, + "TARGET_FILE_EXTENSION", + mock_project_config.TARGET_FILE_EXTENSION, + ) + monkeypatch.setattr(RabbitMQConfig, "HOST", mock_rabbitmq_config.HOST) + monkeypatch.setattr(RabbitMQConfig, "PORT", mock_rabbitmq_config.PORT) + monkeypatch.setattr(RabbitMQConfig, "USERNAME", mock_rabbitmq_config.USERNAME) + monkeypatch.setattr(RabbitMQConfig, "PASSWORD", mock_rabbitmq_config.PASSWORD) + monkeypatch.setattr(RabbitMQConfig, "QUEUE", mock_rabbitmq_config.QUEUE) + + with caplog.at_level("INFO"): + assert main() is None + assert "Successfully published all filenames" in caplog.text diff --git a/producer/tests/test_deployments/test_main/test_traverse_files.py b/producer/tests/test_deployments/test_main/test_traverse_files.py new file mode 100644 index 0000000..a822eab --- /dev/null +++ b/producer/tests/test_deployments/test_main/test_traverse_files.py @@ -0,0 +1,117 @@ +from src.deployments.script.main import traverse_files +from src.deployments.script.config import ProjectConfig +from typing import Type +import pytest +from .utils import random_csv_filenames +import pathlib +from pytest import MonkeyPatch + + +@pytest.mark.parametrize( + "random_csv_filenames", + [random_csv_filenames() for _ in range(5)], +) +def test_traverse_files_show_all_files( + mock_project_config: Type[ProjectConfig], + random_csv_filenames: list[str], + monkeypatch: MonkeyPatch, +): + for path in [ + pathlib.Path(mock_project_config.TARGET_FILE_DIR).joinpath(filename) + for filename in random_csv_filenames + ]: + path.touch() + + monkeypatch.setattr( + ProjectConfig, "TARGET_FILE_DIR", mock_project_config.TARGET_FILE_DIR + ) + monkeypatch.setattr( + ProjectConfig, + "TARGET_FILE_EXTENSION", + mock_project_config.TARGET_FILE_EXTENSION, + ) + assert set(traverse_files()) == set( + [ + str(pathlib.Path(mock_project_config.TARGET_FILE_DIR).joinpath(filename)) + for filename in random_csv_filenames + ] + ) + + +@pytest.mark.parametrize( + "random_csv_filenames", + [random_csv_filenames() for _ in range(5)], +) +def test_traverse_files_show_top_level_files_only( + mock_project_config: Type[ProjectConfig], + random_csv_filenames: list[str], + monkeypatch: MonkeyPatch, +): + temp_dir = pathlib.Path(mock_project_config.TARGET_FILE_DIR) / "temp" + for i, path in enumerate( + [ + pathlib.Path(mock_project_config.TARGET_FILE_DIR).joinpath(filename) + for filename in random_csv_filenames + ] + ): + if i != 4: + path.touch() + else: + temp_dir.mkdir() + (temp_dir / path.name).touch() + + monkeypatch.setattr( + ProjectConfig, "TARGET_FILE_DIR", mock_project_config.TARGET_FILE_DIR + ) + monkeypatch.setattr( + ProjectConfig, + "TARGET_FILE_EXTENSION", + mock_project_config.TARGET_FILE_EXTENSION, + ) + + assert set(traverse_files()) == set( + [ + str(pathlib.Path(mock_project_config.TARGET_FILE_DIR).joinpath(filename)) + for i, filename in enumerate(random_csv_filenames) + if i != 4 + ] + ) + + +@pytest.mark.xfail(reason="Subdirectories are not supported", strict=True) +@pytest.mark.parametrize( + "random_csv_filenames", + [random_csv_filenames() for _ in range(5)], +) +def test_traverse_files_show_all_recursive_files( + mock_project_config: Type[ProjectConfig], + random_csv_filenames: list[str], + monkeypatch: MonkeyPatch, +): + all_path = [] + temp_dir = pathlib.Path(mock_project_config.TARGET_FILE_DIR) / "temp" + for i, path in enumerate( + [ + pathlib.Path(mock_project_config.TARGET_FILE_DIR).joinpath(filename) + for filename in random_csv_filenames + ] + ): + if i != 4: + path.touch() + all_path.append(path) + else: + temp_dir.mkdir() + new_path = temp_dir / path.name + new_path.touch() + all_path.append(new_path) + + monkeypatch.setattr( + ProjectConfig, "TARGET_FILE_DIR", mock_project_config.TARGET_FILE_DIR + ) + monkeypatch.setattr( + ProjectConfig, + "TARGET_FILE_EXTENSION", + mock_project_config.TARGET_FILE_EXTENSION, + ) + + assert set(traverse_files()) == set([str(path) for path in all_path]) diff --git a/producer/tests/test_deployments/test_main/utils.py b/producer/tests/test_deployments/test_main/utils.py new file mode 100644 index 0000000..8cc804a --- /dev/null +++ b/producer/tests/test_deployments/test_main/utils.py @@ -0,0 +1,8 @@ +import random +import string + + +def random_csv_filenames() -> list[str]: + return [ + "".join(random.choices(string.ascii_letters, k=10)) + ".csv" for _ in range(5) + ] From d0757621de5ac28acc1fe170d2813850ca7c0ae4 Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 17:00:57 +0800 Subject: [PATCH 05/36] Debugging the outputs for the step load-dotenv --- .github/workflows/test.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index dfb5187..baa89c4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,16 +7,16 @@ jobs: load-dotenv: runs-on: ubuntu-latest outputs: - postgres-version-tag: ${{ steps.load-dotenv.outputs.postgres-version-tag }} - postgres-port: ${{ steps.load-dotenv.outputs.postgres-port }} - postgres-user: ${{ steps.load-dotenv.outputs.postgres-user }} - postgres-password: ${{ steps.load-dotenv.outputs.postgres-password }} - postgres-database: ${{ steps.load-dotenv.outputs.postgres-database }} - rabbitmq-version-tag: ${{ steps.load-dotenv.outputs.rabbitmq-version-tag }} - rabbitmq-port: ${{ steps.load-dotenv.outputs.rabbitmq-port }} - rabbitmq-user: ${{ steps.load-dotenv.outputs.rabbitmq-user }} - rabbitmq-password: ${{ steps.load-dotenv.outputs.rabbitmq-password }} - queue-name: ${{ steps.load-dotenv.outputs.queue-name }} + postgres-version-tag: ${{ steps.load-dotenv.outputs.POSTGRES_VERSION_TAG }} + postgres-port: ${{ steps.load-dotenv.outputs.POSTGRES_PORT }} + postgres-user: ${{ steps.load-dotenv.outputs.POSTGRES_USER }} + postgres-password: ${{ steps.load-dotenv.outputs.POSTGRES_PASSWORD }} + postgres-database: ${{ steps.load-dotenv.outputs.POSTGRES_DATABASE }} + rabbitmq-version-tag: ${{ steps.load-dotenv.outputs.RABBITMQ_VERSION_TAG }} + rabbitmq-port: ${{ steps.load-dotenv.outputs.RABBITMQ_PORT }} + rabbitmq-user: ${{ steps.load-dotenv.outputs.RABBITMQ_USER }} + rabbitmq-password: ${{ steps.load-dotenv.outputs.RABBITMQ_PASSWORD }} + queue-name: ${{ steps.load-dotenv.outputs.QUEUE_NAME }} steps: - name: Checkout uses: actions/checkout@v4 From 6a1fc3b65779216fb07efc17450750cb69f89b94 Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 17:03:41 +0800 Subject: [PATCH 06/36] Debugging the ports for docker network in github actions --- .github/workflows/test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index baa89c4..b84a4a6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -52,6 +52,8 @@ jobs: --health-interval 5s --health-timeout 30s --health-retries 3 + ports: + - ${{ needs.load-dotenv.outputs.rabbitmq-port }}:5672 steps: - name: Checkout uses: actions/checkout@v4 From 4b10b63b8be4aa404e43305f498480695f1b9253 Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 17:08:52 +0800 Subject: [PATCH 07/36] Debugging the error of queue not found in rabbitmq --- .../test_publish_filenames/test_rabbitmq/conftest.py | 3 ++- producer/tests/test_deployments/test_main/conftest.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/conftest.py b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/conftest.py index 778aa54..165a300 100644 --- a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/conftest.py +++ b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/conftest.py @@ -38,12 +38,13 @@ def raw_rabbitmq_pika_conn_config( @pytest.fixture(scope="function", autouse=True) -def clean_rabbitmq_queue( +def setup_teardown_rabbitmq_queue( raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], ) -> None: pika_conn, queue = raw_rabbitmq_pika_conn_config channel = pika_conn.channel() + channel.queue_declare(queue=queue, durable=True) channel.queue_purge(queue=queue) yield channel.queue_purge(queue=queue) diff --git a/producer/tests/test_deployments/test_main/conftest.py b/producer/tests/test_deployments/test_main/conftest.py index 1d0ac14..280f24c 100644 --- a/producer/tests/test_deployments/test_main/conftest.py +++ b/producer/tests/test_deployments/test_main/conftest.py @@ -45,12 +45,13 @@ def raw_rabbitmq_pika_conn_config( @pytest.fixture(scope="function", autouse=True) -def clean_rabbitmq_queue( +def setup_teardown_rabbitmq_queue( raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], ) -> None: pika_conn, queue = raw_rabbitmq_pika_conn_config channel = pika_conn.channel() + channel.queue_declare(queue=queue, durable=True) channel.queue_purge(queue=queue) yield channel.queue_purge(queue=queue) From 1cba6c4663c8b605f624fcbc480ef4aa6aa62a66 Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 17:20:31 +0800 Subject: [PATCH 08/36] Updated the code coverage testing --- .github/workflows/test.yml | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b84a4a6..7f7809f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -70,7 +70,7 @@ jobs: working-directory: ${{ env.WORK_DIR }} run: | coverage run -m pytest -v - coverage xml -o coverage.xml + coverage report -m env: POSTGRES_HOST: localhost POSTGRES_PORT: ${{ needs.load-dotenv.outputs.postgres-port }} @@ -82,12 +82,10 @@ jobs: RABBITMQ_USER: ${{ needs.load-dotenv.outputs.rabbitmq-user }} RABBITMQ_PASSWORD: ${{ needs.load-dotenv.outputs.rabbitmq-password }} QUEUE_NAME: ${{ needs.load-dotenv.outputs.queue-name }} - - name: Upload coverage - uses: codecov/codecov-action@v2 + - name: upload artifact + uses: actions/upload-pages-artifact@v1 with: - file: ${{ env.WORK_DIR }}/coverage.xml - - name: Coveralls - uses: coverallsapp/github-action@master - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - path-to-lcov: ${{ env.WORK_DIR }}/coverage.xml + path: ${{ env.WORK_DIR }}/htmlcov + - name: deploy to Github Pages + uses: actions/deploy-pages@v2 + id: deployment From f8cf512a6f5306902ea90fd25d95a045ae62f57c Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 17:24:29 +0800 Subject: [PATCH 09/36] Updated the test --- .github/workflows/test.yml | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7f7809f..0539c8d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -36,11 +36,9 @@ jobs: echo "RABBITMQ_USER=$RABBITMQ_USER" >> $GITHUB_OUTPUT echo "RABBITMQ_PASSWORD=$RABBITMQ_PASSWORD" >> $GITHUB_OUTPUT echo "QUEUE_NAME=$QUEUE_NAME" >> $GITHUB_OUTPUT - test-producer: + test: needs: load-dotenv runs-on: ubuntu-latest - env: - WORK_DIR: producer services: rabbitmq: image: rabbitmq:${{ needs.load-dotenv.outputs.rabbitmq-version-tag }} @@ -61,15 +59,17 @@ jobs: with: python-version: '3.11' cache: 'pip' - cache-dependency-path: ${{ env.WORK_DIR }}/requirements-dev.txt + cache-dependency-path: | + producer/requirements-dev.txt + consumer/requirements-dev.txt - name: Install dependencies - working-directory: ${{ env.WORK_DIR }} run: | - pip install -r requirements-dev.txt + pip install -r producer/requirements-dev.txt + pip install -r consumer/requirements-dev.txt - name: Run tests - working-directory: ${{ env.WORK_DIR }} run: | - coverage run -m pytest -v + coverage run -m pytest -v producer/tests consumer/tests + coverage html coverage report -m env: POSTGRES_HOST: localhost @@ -85,7 +85,27 @@ jobs: - name: upload artifact uses: actions/upload-pages-artifact@v1 with: - path: ${{ env.WORK_DIR }}/htmlcov + path: ./htmlcov/ - name: deploy to Github Pages uses: actions/deploy-pages@v2 id: deployment + - name: Coverage Badge + uses: tj-actions/coverage-badge-py@v2 + - name: Verify Changed files + uses: tj-actions/verify-changed-files@v16 + id: verify-changed-files + with: + files: coverage.svg + - name: Commit files + if: steps.verify-changed-files.outputs.files_changed == 'true' + run: | + git config --local user.email "github-actions[bot]@users.noreply.github.com" + git config --local user.name "github-actions[bot]" + git add coverage.svg + git commit -m "Updated coverage.svg" + - name: Push changes + if: steps.verify-changed-files.outputs.files_changed == 'true' + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + branch: ${{ github.ref }} From 69c5d73dd11adf8076486084178c98182f003d17 Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 17:30:18 +0800 Subject: [PATCH 10/36] Updated the id-token permission --- .github/workflows/test.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0539c8d..8b91171 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -39,6 +39,9 @@ jobs: test: needs: load-dotenv runs-on: ubuntu-latest + permissions: + pages: write + id-token: write services: rabbitmq: image: rabbitmq:${{ needs.load-dotenv.outputs.rabbitmq-version-tag }} From 6b02030d13679bd1aadcd7fbd97d0b932551e467 Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 18:04:18 +0800 Subject: [PATCH 11/36] Updated the permissions --- .github/workflows/test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8b91171..9d0242e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -41,6 +41,7 @@ jobs: runs-on: ubuntu-latest permissions: pages: write + contents: write id-token: write services: rabbitmq: From 2ec6ac235401d9710c7af5dab69e0b74ab378ac5 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 2 Dec 2023 10:05:36 +0000 Subject: [PATCH 12/36] Updated coverage.svg --- coverage.svg | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 coverage.svg diff --git a/coverage.svg b/coverage.svg new file mode 100644 index 0000000..0fa9649 --- /dev/null +++ b/coverage.svg @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + coverage + coverage + 98% + 98% + + From 04608101a7e2503ae2c98acc22ac1fa19bb6c02f Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 18:52:27 +0800 Subject: [PATCH 13/36] Updated the CICD test pipeline --- .coverage | Bin 0 -> 69632 bytes .env | 4 +- .github/workflows/test.yml | 148 ++++++++++++++++-- Makefile | 31 +++- README.md | 5 +- __init__.py | 0 consumer/reuqirements-dev.txt | 2 - .../src/adapters/fetch_filenames/rabbitmq.py | 2 +- .../adapters/file_parse_iot_records/csv.py | 4 +- .../adapters/upsert_iot_records/postgres.py | 4 +- consumer/src/deployments/scripts/main.py | 10 +- .../src/usecases/file_parse_iot_records.py | 2 +- consumer/src/usecases/upsert_iot_records.py | 2 +- consumer/tests/test_adapters/__init__.py | 0 .../test_fetch_filenames/__init__.py | 0 .../test_rabbitmq/__init__.py | 0 .../test_rabbitmq/conftest.py | 0 .../test_rabbitmq/test_helloworld.py | 2 + .../test_file_parse_iot_records/__init__.py | 0 .../test_csv/__init__.py | 0 .../test_csv/conftest.py | 0 .../test_upsert_iot_records/__init__.py | 0 .../test_postgres/__init__.py | 0 .../test_postgres/conftest.py | 0 .../test_postgres/test_failed_conn.py | 0 .../test_postgres/test_failed_upsert.py | 0 .../test_postgres/test_successful_upsert.py | 1 + docker-compose.test.yml | 4 +- docker-compose.yml | 4 +- .../test_rabbitmq/conftest.py | 34 ++-- .../test_rabbitmq/test_failed_conn.py | 17 +- .../test_deployments/test_main/conftest.py | 27 +--- .../test_main/test_main_function_failed.py | 8 +- .../test_main_function_successful.py | 8 +- 34 files changed, 220 insertions(+), 99 deletions(-) create mode 100644 .coverage create mode 100644 __init__.py delete mode 100644 consumer/reuqirements-dev.txt create mode 100644 consumer/tests/test_adapters/__init__.py create mode 100644 consumer/tests/test_adapters/test_fetch_filenames/__init__.py create mode 100644 consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/__init__.py create mode 100644 consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/conftest.py create mode 100644 consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_helloworld.py create mode 100644 consumer/tests/test_adapters/test_file_parse_iot_records/__init__.py create mode 100644 consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/__init__.py create mode 100644 consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/conftest.py create mode 100644 consumer/tests/test_adapters/test_upsert_iot_records/__init__.py create mode 100644 consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/__init__.py create mode 100644 consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/conftest.py create mode 100644 consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_conn.py create mode 100644 consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_upsert.py create mode 100644 consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_successful_upsert.py diff --git a/.coverage b/.coverage new file mode 100644 index 0000000000000000000000000000000000000000..c091ac8000814f81f92d84317a50a12355faf859 GIT binary patch literal 69632 zcmeI53v3+MbwGD^XJ`NO_|=NyZzzf)DSlQIMbS=byRt2)svSg@19} zYG+mw9Z|~AL7fV5ouozE6ser1hETw0kTz`^CyiwU4ca2E(zI~w08ZQ{u@nS$mDs4` zMh@M3-+nkNE-CNwqsX}n^4^`_z4x8>nfKnz&fI<1oe5J@#`R1}HI-pUjYAL|zpW?^ zhr_(FDJ`RnCz6_yNRK7rs+mYnDCVSA*gI=zV|b#2b#SV~9S8aG_$`_k zgQHAs0!&P05-Bw^tvswv_wya{-c0SdnOgxyw8TUj_fUHBhxaNOZCuM}>9}U(qUcGC z_4<1sJ>~a=>+6NbG@d3k6NkS*Noruld{Xv|gQ2*d8N=vf88scB)Qo;bEv7K8L!8IW zTxv6VDq`wNGLh!VOdAO^p{Et?xE9Zv+F02H_9L{y2jJOL1GXN*1U-~HGVjYqXa3+a zN9K3WA3P4hrS$^V`2*249L!@*XHzjiemI+&!pwmqa*j=Xu_G5d*3|Qfeb0GQCNJmH zTaujx*arGia2e&U?Zm^b=780yQ$)uu> z^D2W!E4j>p(rm9Ykpc`ggB4BD^E*PM*R-@@un>b1OX{)Gvc^k{p+N_pnGg{b`(o`dxDKsQYHIjuR=3B`^9m#Kj>b+Mtj0Buk@w8=pps-fyo>UENg^X?`rt3*fP3O)HaKs)1 zd?pNq$FT6Mbj!RAV;XHt9PQ>Y6;3YL%sf|boz##}oSHQ=P4QVU_{ z7o8=AkZB;6iW5UQUpm4F@jfJYeIN%fVQVLSe6bU&r4Yf+z)c*#Up_Ap%5z z2oM1xKm>>Y5g-CYfCvx)B0vP*e*|2DQ}E#U-^u>l!7hQI4-p^&M1Tko0U|&IhyW2F z0z`la5CI}^eG&*dU9GHei^o>Y5g-CYfCvx)B2XX@bhQeFSpcWs)$Gep0=QuZy#F6Qwe%Z+0xi-+fCvx) zB0vO)01+SpM1Tko0U|&I-cJN-x}1<4fqeoK#d7NXls-xe5g-CYfCvx)B0vO)01+Sp zM1TkofnQ4kvMySx5Si3dT0~81$0N7v@oY*>Y z5g-CYfCvx)BJe&W;Btk0c>RAg^bH4loBa%)0r+3+CHC*^U)Xc(E9{Hxuh{30b@^5K`|=C&bMjZ@FUV)*Gx8tFpO8N$ zoAO~>Y5g-CYpke|IqoUy3 z(l+<<*WSAIe`daX)6O4Ue&!##{X1aoy4JaukG+1QGy>n{mJ9NB&~43UUii{0-@EkM zv7>=uSYE&3JpGc~2jAN2PDbZGb?MF9ekMs_F8|p!C8Jhdn$>*N`q)_9S{9GqoYM1J8 z>sCRkMI8~O8q|Y=#83}7C5Xc(_X|=8b)O&wQEv%K0o1*My-ASVs5=EoM7>dvT&Oz)$%$GKBms51fY<*=Wy`_tWV>KwKgzbUUbc}n!`ME= zB>7$WZTaW&PvsxUFT?o$y!_8F#(zfsTls1EDf!RkKbD`6KQ3GBm+VdU8he%ffV~K_ z2mi{x!M?`+j{P+|%btY!gHN&3>```tWm%d{u!q=vY+uENoDLuYM1Tko0U|&IhyW2F z0z`la5P|ER0FGCMwo$HHcW~7*!d3Hjt~LyF)wGSP^+Q~Rw{o>E!d2rSR}BMP)%SB% z*T+@u7OrY~xnezB$=zIqHggs1;wrF-D}N_fzKvXYJGk;FTuJR*x!brBTe)(zaOG@v z!*qeLfiH|UakXPTS0iDrwy)!AxRI-E4O|V?bG5aOt4J+ZgEd?YFs}M#uKGe;!CU|$ z>kV+#iHu1>-q0`NmHi}$zxVTcBI6>fR$Ls%&n(Ms~QT#-J2oM1x zKm>>Y5g-CYfCvx)B0vO)z)}S8`ag~Tmm);lhyW2F0z`la5CI}U1c(3;AOb{y2weXJ z`0xMa*B$IKdyD-X?&815e#pKLPXYK(b{_8J|0i~ieU<$^JPY7yxSRhk*k{?Fuurqc z;fVl`vXg9@nM`MgnFe?D-_P!0cfqd+cC%aAP3#6X%m(3?1e;hp+rS!G4GS=d3Gl3d zU&wEOJNgg-B0vO)01+SpM1Tko0U|&IhyW2-Jp!&#c;dZl2Z|9C+ffXo*oI;V#a0v% z6oV)RQ1qkdL$L)#FNz)%-6%Gr=t8jxMJI}lC^}FmDB4l9p=d?Xf}$D41{6&w)}sic zScjq!MFWa@6m=+SQPiMdC}b2N6hRaL6n+#w6kZe_6cP$I3K4}1g%bsT{=e%n$7aWA zhwF^=hV;19Bkv2%xxWif^?NGxdDm|_f8@*x|0m3Ne=K+%r^S0+7kw9flimkB|KYhk zbVKk$a5mT$Xz@SiA9s(6-wiw;IKsZn4po{;EfOD`YgI#$pLG zbtD4F#&He2__1@XecRH#F(q1uRyW-?ZyooF*6@mXvl&)?SG2;*Fl;l`?ta1%E>mrO zN3`mz@?d1*_ToJ_C0g;S3@!Rv(d?9=Wz3R_WlK=^+Sg`V;okg^Xx&yxa*J!)xXA)Cp#L&#_R5YniOu*Y$q4qQ%5Urt=bdXXLX|TH?WI1-1DF>CVx=XaW zDhaRHmSXy|h895mL`E|p^+T)X#8xu5E{jbuB{qu@ zD=5x3R|Th%+Qpml0rRuYs;;d(7;}~D^Jj=6cQcEWFV|pez1~F_oDO#siHiR;DskDgL8pPgf z;NDh4dhQdgkxEj3we}d3mcWLX-7i`RS=pHlv|~qBUBz(PGoQ;HBQc zB3Nn5RzkUG+7@geE?Rr44z}1?jGvt9& zd&iQe_ex86`E&Wg)&kg%-Yr_amE>=UZFnnv!c0IvVSaE0Hg{CYy1Y#rqLHX*?W{V= zV!19q8ia_qS3N)|=`CGxN&v&dl^QMK(|DXA{ z`@ZhG)%zXTfj&fl2oM1xKm>>Y5vYnlOtf}a(v!QQ-@?bY_USpk0_V;mSu>HuIi!|K zXFA`z3+~L!s7E!tqJZ5=blCQYXdPRfG!;`99nYrYW#n$dfYwz9GX1I0HQdc`4Y^fFT{{Jq=?>pcg z{|DsPM;ur=_lz*L~wf6lM@eZIf+-Q@kEcf|9Qr&anx$?g6F_YUy| zG2{A$>-StAbpFt33U3Q11P_?1?sK9`bXYSL%xOR>R_^-0u?f6CycXVVnf*v7IGtWu zr%PU!ux0!s8`s#kE#1iu@O@}i({1y1Kml*VE9K2*cuzYR4p(p3W=d}ZQ}GH+m22|K zcZ~UIc|osrx)mHATRn%^Ctsx3sbyEyXjg6<9AiW}BDkw~B62+3up{2}3mXYah*m?xH2GgGfTx zED~t1@dCCbH$oU4Yahm9-Noq!D5lG8Jt`|Mg{p1&>w!hnHDyt(H)D0c;X})erF?a@ zovO!bSIvv<6m+l#yxh2gX|dV9mx1k(YHTmk9wQxo{r{j`ZPbfcwncq+2yFMQMASB; z_XWY|XthR*7c&=31VA&fHvp!)mTlS=Y|IZ9_f%`KSiu(rYrp;<^R1E(TRQf7mqVtQ z4x7c@9PRx5v6t`9vC z@&soBmjY)4`~8>vr~NnhUh$ps4S2r?JJE*-5CI}U1c(3;AOg!JaBvj*F_nxtp);48 z^T%FvaWGW&-qw9Pz;-cc=z;xJ%gVTcrdGy`GBx2~2GTT1pumcyo) z5}U={gJ7|e;buuT+6-BN0hm6kWTv-hY0)uP8REHdX#SU7i~m;1^7KPARi$g0(d4Ik zbD`rDtGf>(Vbzqrm5Br}^Qm583&i1B(KxQSF+G5^1558pgN&o-= literal 0 HcmV?d00001 diff --git a/.env b/.env index a97e897..8255943 100644 --- a/.env +++ b/.env @@ -1,12 +1,12 @@ POSTGRES_VERSION_TAG=15.3-alpine3.17 POSTGRES_PORT=5432 -POSTGRES_USER=postgres +POSTGRES_USERNAME=postgres POSTGRES_PASSWORD=postgres POSTGRES_DATABASE=records POSTGRES_BATCH_UPSERT_SIZE=1000 RABBITMQ_VERSION_TAG=3.12.10-management -RABBITMQ_USER=rabbitmq +RABBITMQ_USERNAME=rabbitmq RABBITMQ_PASSWORD=rabbitmq RABBITMQ_PORT=5672 RABBITMQ_WEBAPP_PORT=15672 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9d0242e..a4997c9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -36,13 +36,18 @@ jobs: echo "RABBITMQ_USER=$RABBITMQ_USER" >> $GITHUB_OUTPUT echo "RABBITMQ_PASSWORD=$RABBITMQ_PASSWORD" >> $GITHUB_OUTPUT echo "QUEUE_NAME=$QUEUE_NAME" >> $GITHUB_OUTPUT - test: + test-producer: needs: load-dotenv runs-on: ubuntu-latest - permissions: - pages: write - contents: write - id-token: write + env: + WATCH_FILE_PATTERNS: | + producer/**/*.py + producer/requirements-dev.txt + COVERAGE_FILE: .coverage_producer + WORKDIR: producer + outputs: + coverage-file-cache-path: ${{ steps.output-coverage-file.outputs.COVERAGE_FILE_CACHE_PATH }} + coverage-file-cache-key: ${{ steps.output-coverage-file.outputs.COVERAGE_FILE_CACHE_KEY }} services: rabbitmq: image: rabbitmq:${{ needs.load-dotenv.outputs.rabbitmq-version-tag }} @@ -63,18 +68,96 @@ jobs: with: python-version: '3.11' cache: 'pip' - cache-dependency-path: | - producer/requirements-dev.txt - consumer/requirements-dev.txt + cache-dependency-path: ${{env.WORKDIR}}/requirements-dev.txt + - uses: actions/cache@v2 + id: cache + with: + path: ${{env.COVERAGE_FILE}} + key: ${{ runner.os }}-coverage-producer-${{ hashFiles(env.WATCH_FILE_PATTERNS) }} + restore-keys: | + ${{ runner.os }}-coverage-producer- - name: Install dependencies + if: steps.cache.outputs.cache-hit != 'true' + working-directory: ${{env.WORKDIR}} + run: pip install -r requirements-dev.txt + - name: Run tests run: | - pip install -r producer/requirements-dev.txt - pip install -r consumer/requirements-dev.txt + coverage run -m pytest -v producer/tests + env: + POSTGRES_HOST: localhost + POSTGRES_PORT: ${{ needs.load-dotenv.outputs.postgres-port }} + POSTGRES_USER: ${{ needs.load-dotenv.outputs.postgres-user }} + POSTGRES_PASSWORD: ${{ needs.load-dotenv.outputs.postgres-password }} + POSTGRES_DATABASE: ${{ needs.load-dotenv.outputs.postgres-database }} + RABBITMQ_HOST: localhost + RABBITMQ_PORT: ${{ needs.load-dotenv.outputs.rabbitmq-port }} + RABBITMQ_USER: ${{ needs.load-dotenv.outputs.rabbitmq-user }} + RABBITMQ_PASSWORD: ${{ needs.load-dotenv.outputs.rabbitmq-password }} + QUEUE_NAME: ${{ needs.load-dotenv.outputs.queue-name }} + - name: Output coverage file + id: output-coverage-file + if: steps.cache.outputs.cache-hit != 'true' + run: | + echo "COVERAGE_FILE_CACHE_PATH=${{env.COVERAGE_FILE}}" >> $GITHUB_OUTPUT + echo "COVERAGE_FILE_CACHE_KEY=${{ runner.os }}-coverage-producer-${{ hashFiles(env.WATCH_FILE_PATTERNS) }}" >> $GITHUB_OUTPUT + test-consumer: + needs: load-dotenv + runs-on: ubuntu-latest + env: + WATCH_FILE_PATTERNS: | + consumer/**/*.py + consumer/requirements-dev.txt + COVERAGE_FILE: .coverage_consumer + WORKDIR: consumer + outputs: + coverage-file-cache-path: ${{ steps.output-coverage-file.outputs.COVERAGE_FILE_CACHE_PATH }} + coverage-file-cache-key: ${{ steps.output-coverage-file.outputs.COVERAGE_FILE_CACHE_KEY }} + services: + rabbitmq: + image: rabbitmq:${{ needs.load-dotenv.outputs.rabbitmq-version-tag }} + env: + RABBITMQ_DEFAULT_USER: ${{ needs.load-dotenv.outputs.rabbitmq-user }} + RABBITMQ_DEFAULT_PASS: ${{ needs.load-dotenv.outputs.rabbitmq-password }} + options: >- + --health-cmd "rabbitmq-diagnostics -q check_running" + --health-interval 5s + --health-timeout 30s + --health-retries 3 + ports: + - ${{ needs.load-dotenv.outputs.rabbitmq-port }}:5672 + postgres: + image: postgres:${{ needs.load-dotenv.outputs.postgres-version-tag }} + env: + POSTGRES_USER: ${{ needs.load-dotenv.outputs.postgres-user }} + POSTGRES_PASSWORD: ${{ needs.load-dotenv.outputs.postgres-password }} + POSTGRES_DB: ${{ needs.load-dotenv.outputs.postgres-database }} + options: >- + --health-cmd pg_isready + --health-interval 5s + --health-timeout 30s + --health-retries 3 + steps: + - name: Checkout + uses: actions/checkout@v4 + - uses: actions/setup-python@v4 + with: + python-version: '3.11' + cache: 'pip' + cache-dependency-path: ${{env.WORKDIR}}/requirements-dev.txt + - uses: actions/cache@v2 + id: cache + with: + path: ${{env.COVERAGE_FILE}} + key: ${{ runner.os }}-coverage-consumer-${{ hashFiles(env.WATCH_FILE_PATTERNS) }} + restore-keys: | + ${{ runner.os }}-coverage-consumer- + - name: Install dependencies + if: steps.cache.outputs.cache-hit != 'true' + working-directory: ${{env.WORKDIR}} + run: pip install -r requirements-dev.txt - name: Run tests run: | - coverage run -m pytest -v producer/tests consumer/tests - coverage html - coverage report -m + coverage run -m pytest -v consumer/tests env: POSTGRES_HOST: localhost POSTGRES_PORT: ${{ needs.load-dotenv.outputs.postgres-port }} @@ -86,6 +169,45 @@ jobs: RABBITMQ_USER: ${{ needs.load-dotenv.outputs.rabbitmq-user }} RABBITMQ_PASSWORD: ${{ needs.load-dotenv.outputs.rabbitmq-password }} QUEUE_NAME: ${{ needs.load-dotenv.outputs.queue-name }} + - name: Output coverage file + id: output-coverage-file + if: steps.cache.outputs.cache-hit != 'true' + run: | + echo "COVERAGE_FILE_CACHE_PATH=${{env.COVERAGE_FILE}}" >> $GITHUB_OUTPUT + echo "COVERAGE_FILE_CACHE_KEY=${{ runner.os }}-coverage-consumer-${{ hashFiles(env.WATCH_FILE_PATTERNS) }}" >> $GITHUB_OUTPUT + coverage: + needs: [test-producer, test-consumer] + runs-on: ubuntu-latest + permissions: + contents: write + id-token: write + pull-requests: write + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Retrieve producer coverage file + uses: actions/cache@v2 + id: producer-cache + with: + path: ${{ needs.test-producer.outputs.coverage-file-cache-path }} + key: ${{ needs.test-producer.outputs.coverage-file-cache-key }} + restore-keys: | + ${{ runner.os }}-coverage-producer- + - name: Retrieve consumer coverage file + uses: actions/cache@v2 + id: consumer-cache + with: + path: ${{ needs.test-consumer.outputs.coverage-file-cache-path }} + key: ${{ needs.test-consumer.outputs.coverage-file-cache-key }} + restore-keys: | + ${{ runner.os }}-coverage-consumer- + - name: Combine coverage files + run: | + coverage combine ${{ needs.test-producer.outputs.coverage-file-cache-path }} ${{ needs.test-consumer.outputs.coverage-file-cache-path }} + - name: Generate coverage report + run: | + coverage report -m + coverage html - name: upload artifact uses: actions/upload-pages-artifact@v1 with: diff --git a/Makefile b/Makefile index 8393a90..f00e9cb 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,8 @@ +include .env + +POSTGRES_HOST=localhost +RABBITMQ_HOST=localhost + build: docker compose build up: @@ -13,5 +18,29 @@ export_requirements: poetry export -f requirements.txt --output requirements.txt --without-hashes && \ cd ../consumer && \ poetry export -f requirements.txt --output requirements.txt --without-hashes -test_env: +setup_test_env: docker compose -f docker-compose.test.yml up -d +test_producer: + export POSTGRES_HOST=localhost && \ + export POSTGRES_PORT=$(POSTGRES_PORT) && \ + export POSTGRES_USERNAME=$(POSTGRES_USERNAME) && \ + export POSTGRES_PASSWORD=$(POSTGRES_PASSWORD) && \ + export POSTGRES_DATABASE=$(POSTGRES_DB) && \ + export RABBITMQ_HOST=localhost && \ + export RABBITMQ_PORT=$(RABBITMQ_PORT) && \ + export RABBITMQ_USERNAME=$(RABBITMQ_USERNAME) && \ + export RABBITMQ_PASSWORD=$(RABBITMQ_PASSWORD) && \ + export QUEUE_NAME=$(QUEUE_NAME) && \ + COVERAGE_FILE=.coverage_producer coverage run -m pytest -vx producer/tests +test_consumer: + export POSTGRES_HOST=localhost && \ + export POSTGRES_PORT=$(POSTGRES_PORT) && \ + export POSTGRES_USERNAME=$(POSTGRES_USERNAME) && \ + export POSTGRES_PASSWORD=$(POSTGRES_PASSWORD) && \ + export POSTGRES_DATABASE=$(POSTGRES_DB) && \ + export RABBITMQ_HOST=localhost && \ + export RABBITMQ_PORT=$(RABBITMQ_PORT) && \ + export RABBITMQ_USERNAME=$(RABBITMQ_USERNAME) && \ + export RABBITMQ_PASSWORD=$(RABBITMQ_PASSWORD) && \ + export QUEUE_NAME=$(QUEUE_NAME) && \ + COVERAGE_FILE=.coverage_consumer coverage run -m pytest -vx consumer/tests diff --git a/README.md b/README.md index fc212c1..dc121e5 100644 --- a/README.md +++ b/README.md @@ -1 +1,4 @@ -# producer_consumer_csv \ No newline at end of file +# producer_consumer_csv + +![Build Status](https://github.com/github/docs/actions/workflows/test.yml/badge.svg) +![Code Coverage](./coverage.svg) diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/consumer/reuqirements-dev.txt b/consumer/reuqirements-dev.txt deleted file mode 100644 index 6ed92fd..0000000 --- a/consumer/reuqirements-dev.txt +++ /dev/null @@ -1,2 +0,0 @@ -pika==1.3.2 ; python_version >= "3.11" and python_version < "4.0" -psycopg2-binary==2.9.9 ; python_version >= "3.11" and python_version < "4.0" diff --git a/consumer/src/adapters/fetch_filenames/rabbitmq.py b/consumer/src/adapters/fetch_filenames/rabbitmq.py index 1b2b89e..f712f21 100644 --- a/consumer/src/adapters/fetch_filenames/rabbitmq.py +++ b/consumer/src/adapters/fetch_filenames/rabbitmq.py @@ -1,5 +1,5 @@ from contextlib import contextmanager -from usecases import FetchFilenameClient +from ...usecases import FetchFilenameClient import pika from pika.adapters.blocking_connection import BlockingChannel from pika.spec import Basic, BasicProperties diff --git a/consumer/src/adapters/file_parse_iot_records/csv.py b/consumer/src/adapters/file_parse_iot_records/csv.py index cc562ec..072fb20 100644 --- a/consumer/src/adapters/file_parse_iot_records/csv.py +++ b/consumer/src/adapters/file_parse_iot_records/csv.py @@ -3,8 +3,8 @@ from decimal import Decimal from typing import Iterator, Optional, overload, Sequence from typing_extensions import override -from entities import IOTRecord -from usecases import FileParseIOTRecordsClient +from ...entities import IOTRecord +from ...usecases import FileParseIOTRecordsClient import csv import logging diff --git a/consumer/src/adapters/upsert_iot_records/postgres.py b/consumer/src/adapters/upsert_iot_records/postgres.py index 1e19758..266b5df 100644 --- a/consumer/src/adapters/upsert_iot_records/postgres.py +++ b/consumer/src/adapters/upsert_iot_records/postgres.py @@ -4,8 +4,8 @@ from typing_extensions import override import psycopg2 from psycopg2.extensions import connection -from usecases import UpsertIOTRecordsClient -from entities import IOTRecord +from ...usecases import UpsertIOTRecordsClient +from ...entities import IOTRecord from collections.abc import Callable T = TypeVar("T") diff --git a/consumer/src/deployments/scripts/main.py b/consumer/src/deployments/scripts/main.py index 21c76ee..ab51d84 100644 --- a/consumer/src/deployments/scripts/main.py +++ b/consumer/src/deployments/scripts/main.py @@ -1,10 +1,10 @@ -from adapters.fetch_filenames.rabbitmq import RabbitMQFetchFilenamesClient -from adapters.file_parse_iot_records.csv import CSVParseIOTRecordsClient -from adapters.upsert_iot_records.postgres import PostgresUpsertIOTRecordsClient -from config import RabbitMQConfig, PostgresConfig, CSVParserConfig +from ...adapters.fetch_filenames.rabbitmq import RabbitMQFetchFilenamesClient +from ...adapters.file_parse_iot_records.csv import CSVParseIOTRecordsClient +from ...adapters.upsert_iot_records.postgres import PostgresUpsertIOTRecordsClient +from .config import RabbitMQConfig, PostgresConfig, CSVParserConfig from setup_logging import setup_logging import logging -from entities import IOTRecord +from ...entities import IOTRecord setup_logging() diff --git a/consumer/src/usecases/file_parse_iot_records.py b/consumer/src/usecases/file_parse_iot_records.py index 5005f4b..ca2276c 100644 --- a/consumer/src/usecases/file_parse_iot_records.py +++ b/consumer/src/usecases/file_parse_iot_records.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod from typing import Iterator, overload, Sequence -from entities import IOTRecord +from ..entities import IOTRecord class FileParseIOTRecordsClient(ABC): diff --git a/consumer/src/usecases/upsert_iot_records.py b/consumer/src/usecases/upsert_iot_records.py index 7c6b5f7..90a326c 100644 --- a/consumer/src/usecases/upsert_iot_records.py +++ b/consumer/src/usecases/upsert_iot_records.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod from typing import overload, Sequence -from entities import IOTRecord +from ..entities import IOTRecord class UpsertIOTRecordsClient(ABC): diff --git a/consumer/tests/test_adapters/__init__.py b/consumer/tests/test_adapters/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/consumer/tests/test_adapters/test_fetch_filenames/__init__.py b/consumer/tests/test_adapters/test_fetch_filenames/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/__init__.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/conftest.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/conftest.py new file mode 100644 index 0000000..e69de29 diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_helloworld.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_helloworld.py new file mode 100644 index 0000000..68c495a --- /dev/null +++ b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_helloworld.py @@ -0,0 +1,2 @@ +def test_helloworld(): + assert True diff --git a/consumer/tests/test_adapters/test_file_parse_iot_records/__init__.py b/consumer/tests/test_adapters/test_file_parse_iot_records/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/__init__.py b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/conftest.py b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/conftest.py new file mode 100644 index 0000000..e69de29 diff --git a/consumer/tests/test_adapters/test_upsert_iot_records/__init__.py b/consumer/tests/test_adapters/test_upsert_iot_records/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/__init__.py b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/conftest.py b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/conftest.py new file mode 100644 index 0000000..e69de29 diff --git a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_conn.py b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_conn.py new file mode 100644 index 0000000..e69de29 diff --git a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_upsert.py b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_upsert.py new file mode 100644 index 0000000..e69de29 diff --git a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_successful_upsert.py b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_successful_upsert.py new file mode 100644 index 0000000..1d443f1 --- /dev/null +++ b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_successful_upsert.py @@ -0,0 +1 @@ +from src.adapters.upsert_iot_records.postgres import PostgresUpsertIOTRecordsClient diff --git a/docker-compose.test.yml b/docker-compose.test.yml index 53a4dbc..46275ec 100644 --- a/docker-compose.test.yml +++ b/docker-compose.test.yml @@ -10,7 +10,7 @@ services: POSTGRES_VERSION_TAG: ${POSTGRES_VERSION_TAG} environment: POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} - POSTGRES_USER: ${POSTGRES_USER} + POSTGRES_USER: ${POSTGRES_USERNAME} POSTGRES_DB: ${POSTGRES_DATABASE} ports: - ${POSTGRES_PORT}:5432 @@ -19,7 +19,7 @@ services: image: rabbitmq:${RABBITMQ_VERSION_TAG} container_name: records_rabbitmq environment: - RABBITMQ_DEFAULT_USER: ${RABBITMQ_USER} + RABBITMQ_DEFAULT_USER: ${RABBITMQ_USERNAME} RABBITMQ_DEFAULT_PASS: ${RABBITMQ_PASSWORD} ports: - ${RABBITMQ_WEBAPP_PORT}:15672 diff --git a/docker-compose.yml b/docker-compose.yml index 6d94f27..a91bc37 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,7 +10,7 @@ services: POSTGRES_VERSION_TAG: ${POSTGRES_VERSION_TAG} environment: POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} - POSTGRES_USER: ${POSTGRES_USER} + POSTGRES_USER: ${POSTGRES_USERNAME} POSTGRES_DB: ${POSTGRES_DATABASE} ports: - ${POSTGRES_PORT}:5432 @@ -19,7 +19,7 @@ services: image: rabbitmq:${RABBITMQ_VERSION_TAG} container_name: records_rabbitmq environment: - RABBITMQ_DEFAULT_USER: ${RABBITMQ_USER} + RABBITMQ_DEFAULT_USER: ${RABBITMQ_USERNAME} RABBITMQ_DEFAULT_PASS: ${RABBITMQ_PASSWORD} ports: - ${RABBITMQ_WEBAPP_PORT}:15672 diff --git a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/conftest.py b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/conftest.py index 165a300..80b13d9 100644 --- a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/conftest.py +++ b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/conftest.py @@ -1,40 +1,32 @@ from src.adapters.publish_filenames.rabbitmq import RabbitMQPublishFilenamesClient +from src.deployments.script.config import RabbitMQConfig import pika import pytest from pytest import MonkeyPatch -@pytest.fixture(scope="session") -def rabbitmq_config() -> dict: - return { - "host": "localhost", - "port": 5672, - "credentials_service": lambda: ("rabbitmq", "rabbitmq"), - "queue": "filenames", - } - - @pytest.fixture(scope="function") -def rabbitmq_publish_filenames_client( - rabbitmq_config: dict, -) -> RabbitMQPublishFilenamesClient: - return RabbitMQPublishFilenamesClient(**rabbitmq_config) +def rabbitmq_publish_filenames_client() -> RabbitMQPublishFilenamesClient: + return RabbitMQPublishFilenamesClient( + host=RabbitMQConfig.HOST, + port=RabbitMQConfig.PORT, + credentials_service=lambda: (RabbitMQConfig.USERNAME, RabbitMQConfig.PASSWORD), + queue=RabbitMQConfig.QUEUE, + ) @pytest.fixture(scope="function") -def raw_rabbitmq_pika_conn_config( - rabbitmq_config: dict, -) -> tuple[pika.BaseConnection, str]: +def raw_rabbitmq_pika_conn_config() -> tuple[pika.BaseConnection, str]: pika_conn = pika.BlockingConnection( pika.ConnectionParameters( - host=rabbitmq_config["host"], - port=rabbitmq_config["port"], + host=RabbitMQConfig.HOST, + port=RabbitMQConfig.PORT, credentials=pika.PlainCredentials( - *rabbitmq_config["credentials_service"]() + RabbitMQConfig.USERNAME, RabbitMQConfig.PASSWORD ), ) ) - return pika_conn, rabbitmq_config["queue"] + return pika_conn, RabbitMQConfig.QUEUE @pytest.fixture(scope="function", autouse=True) diff --git a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_conn.py b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_conn.py index 43cecaf..7f161a4 100644 --- a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_conn.py +++ b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_conn.py @@ -1,6 +1,7 @@ import pytest from .utils import random_filenames from src.adapters.publish_filenames.rabbitmq import RabbitMQPublishFilenamesClient +from src.deployments.script.config import RabbitMQConfig import pika from pytest import MonkeyPatch @@ -70,14 +71,14 @@ def mocked_failed_conn( @pytest.mark.smoke @pytest.mark.parametrize("filename", random_filenames()) def test_publish_single_wrong_credentials( - rabbitmq_config: dict, raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], filename: str, ): - copied_rabbitmq_config = rabbitmq_config.copy() - copied_rabbitmq_config["credentials_service"] = lambda: ("wrong", "wrong") rabbitmq_publish_filenames_client = RabbitMQPublishFilenamesClient( - **copied_rabbitmq_config + host=RabbitMQConfig.HOST, + port=RabbitMQConfig.PORT, + credentials_service=lambda: ("wrong", "wrong"), + queue=RabbitMQConfig.QUEUE, ) with pytest.raises(Exception) as e: @@ -95,14 +96,14 @@ def test_publish_single_wrong_credentials( @pytest.mark.smoke @pytest.mark.parametrize("filename", random_filenames()) def test_publish_single_wrong_host( - rabbitmq_config: dict, raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], filename: str, ): - copied_rabbitmq_config = rabbitmq_config.copy() - copied_rabbitmq_config["host"] = "wrong" rabbitmq_publish_filenames_client = RabbitMQPublishFilenamesClient( - **copied_rabbitmq_config + host="wrong", + port=RabbitMQConfig.PORT, + credentials_service=lambda: (RabbitMQConfig.USERNAME, RabbitMQConfig.PASSWORD), + queue=RabbitMQConfig.QUEUE, ) with pytest.raises(Exception) as e: diff --git a/producer/tests/test_deployments/test_main/conftest.py b/producer/tests/test_deployments/test_main/conftest.py index 280f24c..06c3090 100644 --- a/producer/tests/test_deployments/test_main/conftest.py +++ b/producer/tests/test_deployments/test_main/conftest.py @@ -4,44 +4,29 @@ import pytest from pytest import TempdirFactory import pathlib -import os - - -@pytest.fixture(scope="session") -def mock_rabbitmq_config() -> Type[RabbitMQConfig]: - class MockedRabbitMQConfig(RabbitMQConfig): - HOST = "localhost" - PORT = 5672 - USERNAME = "rabbitmq" - PASSWORD = "rabbitmq" - QUEUE = "filenames" - - return MockedRabbitMQConfig @pytest.fixture(scope="session") def mock_project_config(tmpdir_factory: TempdirFactory) -> None: class MockedProjectConfig(ProjectConfig): TARGET_FILE_DIR = str(tmpdir_factory.mktemp("artifact")) - TARGET_FILE_EXTENSION = ".csv" + TARGET_FILE_EXTENSION = ProjectConfig.TARGET_FILE_EXTENSION return MockedProjectConfig @pytest.fixture(scope="function") -def raw_rabbitmq_pika_conn_config( - mock_rabbitmq_config: Type[RabbitMQConfig], -) -> tuple[pika.BaseConnection, str]: +def raw_rabbitmq_pika_conn_config() -> tuple[pika.BaseConnection, str]: pika_conn = pika.BlockingConnection( pika.ConnectionParameters( - host=mock_rabbitmq_config.HOST, - port=mock_rabbitmq_config.PORT, + host=RabbitMQConfig.HOST, + port=RabbitMQConfig.PORT, credentials=pika.PlainCredentials( - mock_rabbitmq_config.USERNAME, mock_rabbitmq_config.PASSWORD + RabbitMQConfig.USERNAME, RabbitMQConfig.PASSWORD ), ) ) - return pika_conn, mock_rabbitmq_config.QUEUE + return pika_conn, RabbitMQConfig.QUEUE @pytest.fixture(scope="function", autouse=True) diff --git a/producer/tests/test_deployments/test_main/test_main_function_failed.py b/producer/tests/test_deployments/test_main/test_main_function_failed.py index 573d1ef..27a18e9 100644 --- a/producer/tests/test_deployments/test_main/test_main_function_failed.py +++ b/producer/tests/test_deployments/test_main/test_main_function_failed.py @@ -1,5 +1,5 @@ from src.deployments.script.main import main -from src.deployments.script.config import ProjectConfig, RabbitMQConfig +from src.deployments.script.config import ProjectConfig from typing import Type import pytest from .utils import random_csv_filenames @@ -12,7 +12,6 @@ [random_csv_filenames() for _ in range(5)], ) def test_main_flow_has_failed_files( - mock_rabbitmq_config: Type[RabbitMQConfig], mock_project_config: Type[ProjectConfig], random_csv_filenames: list[str], monkeypatch: MonkeyPatch, @@ -32,11 +31,6 @@ def test_main_flow_has_failed_files( "TARGET_FILE_EXTENSION", mock_project_config.TARGET_FILE_EXTENSION, ) - monkeypatch.setattr(RabbitMQConfig, "HOST", mock_rabbitmq_config.HOST) - monkeypatch.setattr(RabbitMQConfig, "PORT", mock_rabbitmq_config.PORT) - monkeypatch.setattr(RabbitMQConfig, "USERNAME", mock_rabbitmq_config.USERNAME) - monkeypatch.setattr(RabbitMQConfig, "PASSWORD", mock_rabbitmq_config.PASSWORD) - monkeypatch.setattr(RabbitMQConfig, "QUEUE", mock_rabbitmq_config.QUEUE) monkeypatch.setattr( "src.adapters.publish_filenames.rabbitmq.RabbitMQPublishFilenamesClient.publish", diff --git a/producer/tests/test_deployments/test_main/test_main_function_successful.py b/producer/tests/test_deployments/test_main/test_main_function_successful.py index fe8f72a..2e5ba8a 100644 --- a/producer/tests/test_deployments/test_main/test_main_function_successful.py +++ b/producer/tests/test_deployments/test_main/test_main_function_successful.py @@ -1,5 +1,5 @@ from src.deployments.script.main import main -from src.deployments.script.config import ProjectConfig, RabbitMQConfig +from src.deployments.script.config import ProjectConfig from typing import Type import pytest from .utils import random_csv_filenames @@ -12,7 +12,6 @@ [random_csv_filenames() for _ in range(5)], ) def test_main_flow_no_failed_files( - mock_rabbitmq_config: Type[RabbitMQConfig], mock_project_config: Type[ProjectConfig], random_csv_filenames: list[str], monkeypatch: MonkeyPatch, @@ -32,11 +31,6 @@ def test_main_flow_no_failed_files( "TARGET_FILE_EXTENSION", mock_project_config.TARGET_FILE_EXTENSION, ) - monkeypatch.setattr(RabbitMQConfig, "HOST", mock_rabbitmq_config.HOST) - monkeypatch.setattr(RabbitMQConfig, "PORT", mock_rabbitmq_config.PORT) - monkeypatch.setattr(RabbitMQConfig, "USERNAME", mock_rabbitmq_config.USERNAME) - monkeypatch.setattr(RabbitMQConfig, "PASSWORD", mock_rabbitmq_config.PASSWORD) - monkeypatch.setattr(RabbitMQConfig, "QUEUE", mock_rabbitmq_config.QUEUE) with caplog.at_level("INFO"): assert main() is None From be39f26966f7f0a8d12394bb8f00c59eae680256 Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 18:55:47 +0800 Subject: [PATCH 14/36] Updated the coverage step --- .github/workflows/test.yml | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a4997c9..eaced92 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -64,11 +64,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 - - uses: actions/setup-python@v4 - with: - python-version: '3.11' - cache: 'pip' - cache-dependency-path: ${{env.WORKDIR}}/requirements-dev.txt - uses: actions/cache@v2 id: cache with: @@ -76,6 +71,12 @@ jobs: key: ${{ runner.os }}-coverage-producer-${{ hashFiles(env.WATCH_FILE_PATTERNS) }} restore-keys: | ${{ runner.os }}-coverage-producer- + - uses: actions/setup-python@v4 + if: steps.cache.outputs.cache-hit != 'true' + with: + python-version: '3.11' + cache: 'pip' + cache-dependency-path: ${{env.WORKDIR}}/requirements-dev.txt - name: Install dependencies if: steps.cache.outputs.cache-hit != 'true' working-directory: ${{env.WORKDIR}} @@ -139,11 +140,6 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 - - uses: actions/setup-python@v4 - with: - python-version: '3.11' - cache: 'pip' - cache-dependency-path: ${{env.WORKDIR}}/requirements-dev.txt - uses: actions/cache@v2 id: cache with: @@ -151,6 +147,12 @@ jobs: key: ${{ runner.os }}-coverage-consumer-${{ hashFiles(env.WATCH_FILE_PATTERNS) }} restore-keys: | ${{ runner.os }}-coverage-consumer- + - uses: actions/setup-python@v4 + if: steps.cache.outputs.cache-hit != 'true' + with: + python-version: '3.11' + cache: 'pip' + cache-dependency-path: ${{env.WORKDIR}}/requirements-dev.txt - name: Install dependencies if: steps.cache.outputs.cache-hit != 'true' working-directory: ${{env.WORKDIR}} @@ -201,6 +203,11 @@ jobs: key: ${{ needs.test-consumer.outputs.coverage-file-cache-key }} restore-keys: | ${{ runner.os }}-coverage-consumer- + - uses: actions/setup-python@v4 + with: + python-version: '3.11' + - name: Install dependencies + run: pip install coverage - name: Combine coverage files run: | coverage combine ${{ needs.test-producer.outputs.coverage-file-cache-path }} ${{ needs.test-consumer.outputs.coverage-file-cache-path }} From f4bca34487a948b78c0c2a3d59f09283a66f0e3b Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 18:57:10 +0800 Subject: [PATCH 15/36] Solving the cache hit problem --- .github/workflows/test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index eaced92..5281343 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -82,6 +82,7 @@ jobs: working-directory: ${{env.WORKDIR}} run: pip install -r requirements-dev.txt - name: Run tests + if: steps.cache.outputs.cache-hit != 'true' run: | coverage run -m pytest -v producer/tests env: @@ -158,6 +159,7 @@ jobs: working-directory: ${{env.WORKDIR}} run: pip install -r requirements-dev.txt - name: Run tests + if: steps.cache.outputs.cache-hit != 'true' run: | coverage run -m pytest -v consumer/tests env: From 770eda2e62a05d4bd76582ac851b6e31a92bbaa9 Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 18:58:57 +0800 Subject: [PATCH 16/36] Debugging the coverage file --- .github/workflows/test.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5281343..abff351 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -98,7 +98,6 @@ jobs: QUEUE_NAME: ${{ needs.load-dotenv.outputs.queue-name }} - name: Output coverage file id: output-coverage-file - if: steps.cache.outputs.cache-hit != 'true' run: | echo "COVERAGE_FILE_CACHE_PATH=${{env.COVERAGE_FILE}}" >> $GITHUB_OUTPUT echo "COVERAGE_FILE_CACHE_KEY=${{ runner.os }}-coverage-producer-${{ hashFiles(env.WATCH_FILE_PATTERNS) }}" >> $GITHUB_OUTPUT @@ -175,7 +174,6 @@ jobs: QUEUE_NAME: ${{ needs.load-dotenv.outputs.queue-name }} - name: Output coverage file id: output-coverage-file - if: steps.cache.outputs.cache-hit != 'true' run: | echo "COVERAGE_FILE_CACHE_PATH=${{env.COVERAGE_FILE}}" >> $GITHUB_OUTPUT echo "COVERAGE_FILE_CACHE_KEY=${{ runner.os }}-coverage-consumer-${{ hashFiles(env.WATCH_FILE_PATTERNS) }}" >> $GITHUB_OUTPUT From 35fd2dda2985be105aa920457fa7fa6f931f9cfd Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 19:00:26 +0800 Subject: [PATCH 17/36] Debugging the permissions --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index abff351..0908043 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -183,7 +183,7 @@ jobs: permissions: contents: write id-token: write - pull-requests: write + pages: write steps: - name: Checkout uses: actions/checkout@v4 From dec5d53ffe627d0cfad50f508e6774eb64a94694 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 2 Dec 2023 11:01:40 +0000 Subject: [PATCH 18/36] Updated coverage.svg --- coverage.svg | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/coverage.svg b/coverage.svg index 0fa9649..b3e8ba0 100644 --- a/coverage.svg +++ b/coverage.svg @@ -9,13 +9,13 @@ - + coverage coverage - 98% - 98% + 88% + 88% From 550a3a126b37bf6131c35fc4097ae631a4c3f02e Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 19:05:08 +0800 Subject: [PATCH 19/36] Ignore the coverage on test files --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0908043..a2872b9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -214,7 +214,7 @@ jobs: - name: Generate coverage report run: | coverage report -m - coverage html + coverage html --omit="*/test*" - name: upload artifact uses: actions/upload-pages-artifact@v1 with: From 1290ec4ff765aa719d916d7e9db0e5e60bbcaa21 Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 19:11:43 +0800 Subject: [PATCH 20/36] Updated the coverage report scope to remove test files --- .coverage | Bin 69632 -> 0 bytes .github/workflows/test.yml | 2 +- Makefile | 4 ++++ 3 files changed, 5 insertions(+), 1 deletion(-) delete mode 100644 .coverage diff --git a/.coverage b/.coverage deleted file mode 100644 index c091ac8000814f81f92d84317a50a12355faf859..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 69632 zcmeI53v3+MbwGD^XJ`NO_|=NyZzzf)DSlQIMbS=byRt2)svSg@19} zYG+mw9Z|~AL7fV5ouozE6ser1hETw0kTz`^CyiwU4ca2E(zI~w08ZQ{u@nS$mDs4` zMh@M3-+nkNE-CNwqsX}n^4^`_z4x8>nfKnz&fI<1oe5J@#`R1}HI-pUjYAL|zpW?^ zhr_(FDJ`RnCz6_yNRK7rs+mYnDCVSA*gI=zV|b#2b#SV~9S8aG_$`_k zgQHAs0!&P05-Bw^tvswv_wya{-c0SdnOgxyw8TUj_fUHBhxaNOZCuM}>9}U(qUcGC z_4<1sJ>~a=>+6NbG@d3k6NkS*Noruld{Xv|gQ2*d8N=vf88scB)Qo;bEv7K8L!8IW zTxv6VDq`wNGLh!VOdAO^p{Et?xE9Zv+F02H_9L{y2jJOL1GXN*1U-~HGVjYqXa3+a zN9K3WA3P4hrS$^V`2*249L!@*XHzjiemI+&!pwmqa*j=Xu_G5d*3|Qfeb0GQCNJmH zTaujx*arGia2e&U?Zm^b=780yQ$)uu> z^D2W!E4j>p(rm9Ykpc`ggB4BD^E*PM*R-@@un>b1OX{)Gvc^k{p+N_pnGg{b`(o`dxDKsQYHIjuR=3B`^9m#Kj>b+Mtj0Buk@w8=pps-fyo>UENg^X?`rt3*fP3O)HaKs)1 zd?pNq$FT6Mbj!RAV;XHt9PQ>Y6;3YL%sf|boz##}oSHQ=P4QVU_{ z7o8=AkZB;6iW5UQUpm4F@jfJYeIN%fVQVLSe6bU&r4Yf+z)c*#Up_Ap%5z z2oM1xKm>>Y5g-CYfCvx)B0vP*e*|2DQ}E#U-^u>l!7hQI4-p^&M1Tko0U|&IhyW2F z0z`la5CI}^eG&*dU9GHei^o>Y5g-CYfCvx)B2XX@bhQeFSpcWs)$Gep0=QuZy#F6Qwe%Z+0xi-+fCvx) zB0vO)01+SpM1Tko0U|&I-cJN-x}1<4fqeoK#d7NXls-xe5g-CYfCvx)B0vO)01+Sp zM1TkofnQ4kvMySx5Si3dT0~81$0N7v@oY*>Y z5g-CYfCvx)BJe&W;Btk0c>RAg^bH4loBa%)0r+3+CHC*^U)Xc(E9{Hxuh{30b@^5K`|=C&bMjZ@FUV)*Gx8tFpO8N$ zoAO~>Y5g-CYpke|IqoUy3 z(l+<<*WSAIe`daX)6O4Ue&!##{X1aoy4JaukG+1QGy>n{mJ9NB&~43UUii{0-@EkM zv7>=uSYE&3JpGc~2jAN2PDbZGb?MF9ekMs_F8|p!C8Jhdn$>*N`q)_9S{9GqoYM1J8 z>sCRkMI8~O8q|Y=#83}7C5Xc(_X|=8b)O&wQEv%K0o1*My-ASVs5=EoM7>dvT&Oz)$%$GKBms51fY<*=Wy`_tWV>KwKgzbUUbc}n!`ME= zB>7$WZTaW&PvsxUFT?o$y!_8F#(zfsTls1EDf!RkKbD`6KQ3GBm+VdU8he%ffV~K_ z2mi{x!M?`+j{P+|%btY!gHN&3>```tWm%d{u!q=vY+uENoDLuYM1Tko0U|&IhyW2F z0z`la5P|ER0FGCMwo$HHcW~7*!d3Hjt~LyF)wGSP^+Q~Rw{o>E!d2rSR}BMP)%SB% z*T+@u7OrY~xnezB$=zIqHggs1;wrF-D}N_fzKvXYJGk;FTuJR*x!brBTe)(zaOG@v z!*qeLfiH|UakXPTS0iDrwy)!AxRI-E4O|V?bG5aOt4J+ZgEd?YFs}M#uKGe;!CU|$ z>kV+#iHu1>-q0`NmHi}$zxVTcBI6>fR$Ls%&n(Ms~QT#-J2oM1x zKm>>Y5g-CYfCvx)B0vO)z)}S8`ag~Tmm);lhyW2F0z`la5CI}U1c(3;AOb{y2weXJ z`0xMa*B$IKdyD-X?&815e#pKLPXYK(b{_8J|0i~ieU<$^JPY7yxSRhk*k{?Fuurqc z;fVl`vXg9@nM`MgnFe?D-_P!0cfqd+cC%aAP3#6X%m(3?1e;hp+rS!G4GS=d3Gl3d zU&wEOJNgg-B0vO)01+SpM1Tko0U|&IhyW2-Jp!&#c;dZl2Z|9C+ffXo*oI;V#a0v% z6oV)RQ1qkdL$L)#FNz)%-6%Gr=t8jxMJI}lC^}FmDB4l9p=d?Xf}$D41{6&w)}sic zScjq!MFWa@6m=+SQPiMdC}b2N6hRaL6n+#w6kZe_6cP$I3K4}1g%bsT{=e%n$7aWA zhwF^=hV;19Bkv2%xxWif^?NGxdDm|_f8@*x|0m3Ne=K+%r^S0+7kw9flimkB|KYhk zbVKk$a5mT$Xz@SiA9s(6-wiw;IKsZn4po{;EfOD`YgI#$pLG zbtD4F#&He2__1@XecRH#F(q1uRyW-?ZyooF*6@mXvl&)?SG2;*Fl;l`?ta1%E>mrO zN3`mz@?d1*_ToJ_C0g;S3@!Rv(d?9=Wz3R_WlK=^+Sg`V;okg^Xx&yxa*J!)xXA)Cp#L&#_R5YniOu*Y$q4qQ%5Urt=bdXXLX|TH?WI1-1DF>CVx=XaW zDhaRHmSXy|h895mL`E|p^+T)X#8xu5E{jbuB{qu@ zD=5x3R|Th%+Qpml0rRuYs;;d(7;}~D^Jj=6cQcEWFV|pez1~F_oDO#siHiR;DskDgL8pPgf z;NDh4dhQdgkxEj3we}d3mcWLX-7i`RS=pHlv|~qBUBz(PGoQ;HBQc zB3Nn5RzkUG+7@geE?Rr44z}1?jGvt9& zd&iQe_ex86`E&Wg)&kg%-Yr_amE>=UZFnnv!c0IvVSaE0Hg{CYy1Y#rqLHX*?W{V= zV!19q8ia_qS3N)|=`CGxN&v&dl^QMK(|DXA{ z`@ZhG)%zXTfj&fl2oM1xKm>>Y5vYnlOtf}a(v!QQ-@?bY_USpk0_V;mSu>HuIi!|K zXFA`z3+~L!s7E!tqJZ5=blCQYXdPRfG!;`99nYrYW#n$dfYwz9GX1I0HQdc`4Y^fFT{{Jq=?>pcg z{|DsPM;ur=_lz*L~wf6lM@eZIf+-Q@kEcf|9Qr&anx$?g6F_YUy| zG2{A$>-StAbpFt33U3Q11P_?1?sK9`bXYSL%xOR>R_^-0u?f6CycXVVnf*v7IGtWu zr%PU!ux0!s8`s#kE#1iu@O@}i({1y1Kml*VE9K2*cuzYR4p(p3W=d}ZQ}GH+m22|K zcZ~UIc|osrx)mHATRn%^Ctsx3sbyEyXjg6<9AiW}BDkw~B62+3up{2}3mXYah*m?xH2GgGfTx zED~t1@dCCbH$oU4Yahm9-Noq!D5lG8Jt`|Mg{p1&>w!hnHDyt(H)D0c;X})erF?a@ zovO!bSIvv<6m+l#yxh2gX|dV9mx1k(YHTmk9wQxo{r{j`ZPbfcwncq+2yFMQMASB; z_XWY|XthR*7c&=31VA&fHvp!)mTlS=Y|IZ9_f%`KSiu(rYrp;<^R1E(TRQf7mqVtQ z4x7c@9PRx5v6t`9vC z@&soBmjY)4`~8>vr~NnhUh$ps4S2r?JJE*-5CI}U1c(3;AOg!JaBvj*F_nxtp);48 z^T%FvaWGW&-qw9Pz;-cc=z;xJ%gVTcrdGy`GBx2~2GTT1pumcyo) z5}U={gJ7|e;buuT+6-BN0hm6kWTv-hY0)uP8REHdX#SU7i~m;1^7KPARi$g0(d4Ik zbD`rDtGf>(Vbzqrm5Br}^Qm583&i1B(KxQSF+G5^1558pgN&o-= diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a2872b9..ce63658 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -214,7 +214,7 @@ jobs: - name: Generate coverage report run: | coverage report -m - coverage html --omit="*/test*" + coverage html --omit="*/tests/*" - name: upload artifact uses: actions/upload-pages-artifact@v1 with: diff --git a/Makefile b/Makefile index f00e9cb..5ce68c3 100644 --- a/Makefile +++ b/Makefile @@ -44,3 +44,7 @@ test_consumer: export RABBITMQ_PASSWORD=$(RABBITMQ_PASSWORD) && \ export QUEUE_NAME=$(QUEUE_NAME) && \ COVERAGE_FILE=.coverage_consumer coverage run -m pytest -vx consumer/tests +coverage_report: + coverage combine .coverage_producer .coverage_consumer && \ + coverage report -m --omit="*/tests/*" +test: test_producer test_consumer coverage_report From 76778c0da3123bf4627baf1d90eabb742a3ecfe7 Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 22:21:02 +0800 Subject: [PATCH 21/36] Updated the postgresql test case --- .env | 2 +- .github/workflows/test.yml | 27 +-- Makefile | 6 +- .../src/adapters/fetch_filenames/rabbitmq.py | 3 +- .../adapters/upsert_iot_records/postgres.py | 12 +- consumer/src/deployments/scripts/main.py | 3 +- .../test_rabbitmq/conftest.py | 30 +++ .../test_rabbitmq/test_helloworld.py | 2 - .../test_postgres/conftest.py | 52 ++++ .../test_postgres/test_close_conn_failed.py | 19 ++ .../test_close_conn_successful.py | 16 ++ .../test_postgres/test_failed_conn.py | 227 ++++++++++++++++++ .../test_postgres/test_failed_upsert.py | 195 +++++++++++++++ .../test_postgres/test_successful_upsert.py | 84 +++++++ .../test_postgres/test_upsert_idempotent.py | 91 +++++++ .../test_postgres/utils.py | 67 ++++++ .../test_rabbitmq/test_failed_conn.py | 18 +- .../test_rabbitmq/test_failed_publish.py | 16 +- .../test_publish_non_idempotent.py | 57 +++++ .../test_rabbitmq/test_successful_publish.py | 8 +- 20 files changed, 887 insertions(+), 48 deletions(-) delete mode 100644 consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_helloworld.py create mode 100644 consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_close_conn_failed.py create mode 100644 consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_close_conn_successful.py create mode 100644 consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_upsert_idempotent.py create mode 100644 consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/utils.py create mode 100644 producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_publish_non_idempotent.py diff --git a/.env b/.env index 8255943..19bf897 100644 --- a/.env +++ b/.env @@ -32,5 +32,5 @@ CONSUMER_LOG_DIR=./logs/producer CONSUMER_LOG_RETENTION=7 CONSUMER_LOG_ROTATION=midnight -CSV_PARSER_RECOGNIZED_DATETIME_FORMATS="%Y-%m-%dT%H:%M:%S.%fZ" +CSV_PARSER_RECOGNIZED_DATETIME_FORMATS="%Y-%m-%dT%H:%M:%S.%f%z" CSV_PARSER_DELIMITER="," diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ce63658..ed57614 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,12 +9,12 @@ jobs: outputs: postgres-version-tag: ${{ steps.load-dotenv.outputs.POSTGRES_VERSION_TAG }} postgres-port: ${{ steps.load-dotenv.outputs.POSTGRES_PORT }} - postgres-user: ${{ steps.load-dotenv.outputs.POSTGRES_USER }} + postgres-username: ${{ steps.load-dotenv.outputs.POSTGRES_USERNAME }} postgres-password: ${{ steps.load-dotenv.outputs.POSTGRES_PASSWORD }} postgres-database: ${{ steps.load-dotenv.outputs.POSTGRES_DATABASE }} rabbitmq-version-tag: ${{ steps.load-dotenv.outputs.RABBITMQ_VERSION_TAG }} rabbitmq-port: ${{ steps.load-dotenv.outputs.RABBITMQ_PORT }} - rabbitmq-user: ${{ steps.load-dotenv.outputs.RABBITMQ_USER }} + rabbitmq-username: ${{ steps.load-dotenv.outputs.RABBITMQ_USERNAME }} rabbitmq-password: ${{ steps.load-dotenv.outputs.RABBITMQ_PASSWORD }} queue-name: ${{ steps.load-dotenv.outputs.QUEUE_NAME }} steps: @@ -28,12 +28,12 @@ jobs: set +o allexport echo "POSTGRES_VERSION_TAG=$POSTGRES_VERSION_TAG" >> $GITHUB_OUTPUT echo "POSTGRES_PORT=$POSTGRES_PORT" >> $GITHUB_OUTPUT - echo "POSTGRES_USER=$POSTGRES_USER" >> $GITHUB_OUTPUT + echo "POSTGRES_USERNAME=$POSTGRES_USERNAME" >> $GITHUB_OUTPUT echo "POSTGRES_PASSWORD=$POSTGRES_PASSWORD" >> $GITHUB_OUTPUT echo "POSTGRES_DATABASE=$POSTGRES_DATABASE" >> $GITHUB_OUTPUT echo "RABBITMQ_VERSION_TAG=$RABBITMQ_VERSION_TAG" >> $GITHUB_OUTPUT echo "RABBITMQ_PORT=$RABBITMQ_PORT" >> $GITHUB_OUTPUT - echo "RABBITMQ_USER=$RABBITMQ_USER" >> $GITHUB_OUTPUT + echo "RABBITMQ_USERNAME=$RABBITMQ_USERNAME" >> $GITHUB_OUTPUT echo "RABBITMQ_PASSWORD=$RABBITMQ_PASSWORD" >> $GITHUB_OUTPUT echo "QUEUE_NAME=$QUEUE_NAME" >> $GITHUB_OUTPUT test-producer: @@ -52,7 +52,7 @@ jobs: rabbitmq: image: rabbitmq:${{ needs.load-dotenv.outputs.rabbitmq-version-tag }} env: - RABBITMQ_DEFAULT_USER: ${{ needs.load-dotenv.outputs.rabbitmq-user }} + RABBITMQ_DEFAULT_USER: ${{ needs.load-dotenv.outputs.rabbitmq-username }} RABBITMQ_DEFAULT_PASS: ${{ needs.load-dotenv.outputs.rabbitmq-password }} options: >- --health-cmd "rabbitmq-diagnostics -q check_running" @@ -86,14 +86,9 @@ jobs: run: | coverage run -m pytest -v producer/tests env: - POSTGRES_HOST: localhost - POSTGRES_PORT: ${{ needs.load-dotenv.outputs.postgres-port }} - POSTGRES_USER: ${{ needs.load-dotenv.outputs.postgres-user }} - POSTGRES_PASSWORD: ${{ needs.load-dotenv.outputs.postgres-password }} - POSTGRES_DATABASE: ${{ needs.load-dotenv.outputs.postgres-database }} RABBITMQ_HOST: localhost RABBITMQ_PORT: ${{ needs.load-dotenv.outputs.rabbitmq-port }} - RABBITMQ_USER: ${{ needs.load-dotenv.outputs.rabbitmq-user }} + RABBITMQ_USERNAME: ${{ needs.load-dotenv.outputs.rabbitmq-username }} RABBITMQ_PASSWORD: ${{ needs.load-dotenv.outputs.rabbitmq-password }} QUEUE_NAME: ${{ needs.load-dotenv.outputs.queue-name }} - name: Output coverage file @@ -117,7 +112,7 @@ jobs: rabbitmq: image: rabbitmq:${{ needs.load-dotenv.outputs.rabbitmq-version-tag }} env: - RABBITMQ_DEFAULT_USER: ${{ needs.load-dotenv.outputs.rabbitmq-user }} + RABBITMQ_DEFAULT_USER: ${{ needs.load-dotenv.outputs.rabbitmq-username }} RABBITMQ_DEFAULT_PASS: ${{ needs.load-dotenv.outputs.rabbitmq-password }} options: >- --health-cmd "rabbitmq-diagnostics -q check_running" @@ -129,9 +124,9 @@ jobs: postgres: image: postgres:${{ needs.load-dotenv.outputs.postgres-version-tag }} env: - POSTGRES_USER: ${{ needs.load-dotenv.outputs.postgres-user }} + POSTGRES_USER: ${{ needs.load-dotenv.outputs.postgres-username }} POSTGRES_PASSWORD: ${{ needs.load-dotenv.outputs.postgres-password }} - POSTGRES_DB: ${{ needs.load-dotenv.outputs.postgres-database }} + POSTGRES_DATABASE: ${{ needs.load-dotenv.outputs.postgres-database }} options: >- --health-cmd pg_isready --health-interval 5s @@ -164,12 +159,12 @@ jobs: env: POSTGRES_HOST: localhost POSTGRES_PORT: ${{ needs.load-dotenv.outputs.postgres-port }} - POSTGRES_USER: ${{ needs.load-dotenv.outputs.postgres-user }} + POSTGRES_USERNAME: ${{ needs.load-dotenv.outputs.postgres-username }} POSTGRES_PASSWORD: ${{ needs.load-dotenv.outputs.postgres-password }} POSTGRES_DATABASE: ${{ needs.load-dotenv.outputs.postgres-database }} RABBITMQ_HOST: localhost RABBITMQ_PORT: ${{ needs.load-dotenv.outputs.rabbitmq-port }} - RABBITMQ_USER: ${{ needs.load-dotenv.outputs.rabbitmq-user }} + RABBITMQ_USERNAME: ${{ needs.load-dotenv.outputs.rabbitmq-username }} RABBITMQ_PASSWORD: ${{ needs.load-dotenv.outputs.rabbitmq-password }} QUEUE_NAME: ${{ needs.load-dotenv.outputs.queue-name }} - name: Output coverage file diff --git a/Makefile b/Makefile index 5ce68c3..3411d53 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ test_producer: export POSTGRES_PORT=$(POSTGRES_PORT) && \ export POSTGRES_USERNAME=$(POSTGRES_USERNAME) && \ export POSTGRES_PASSWORD=$(POSTGRES_PASSWORD) && \ - export POSTGRES_DATABASE=$(POSTGRES_DB) && \ + export POSTGRES_DATABASE=$(POSTGRES_DATABASE) && \ export RABBITMQ_HOST=localhost && \ export RABBITMQ_PORT=$(RABBITMQ_PORT) && \ export RABBITMQ_USERNAME=$(RABBITMQ_USERNAME) && \ @@ -37,13 +37,13 @@ test_consumer: export POSTGRES_PORT=$(POSTGRES_PORT) && \ export POSTGRES_USERNAME=$(POSTGRES_USERNAME) && \ export POSTGRES_PASSWORD=$(POSTGRES_PASSWORD) && \ - export POSTGRES_DATABASE=$(POSTGRES_DB) && \ + export POSTGRES_DATABASE=$(POSTGRES_DATABASE) && \ export RABBITMQ_HOST=localhost && \ export RABBITMQ_PORT=$(RABBITMQ_PORT) && \ export RABBITMQ_USERNAME=$(RABBITMQ_USERNAME) && \ export RABBITMQ_PASSWORD=$(RABBITMQ_PASSWORD) && \ export QUEUE_NAME=$(QUEUE_NAME) && \ - COVERAGE_FILE=.coverage_consumer coverage run -m pytest -vx consumer/tests + COVERAGE_FILE=.coverage_consumer coverage run -m pytest -vxs consumer/tests coverage_report: coverage combine .coverage_producer .coverage_consumer && \ coverage report -m --omit="*/tests/*" diff --git a/consumer/src/adapters/fetch_filenames/rabbitmq.py b/consumer/src/adapters/fetch_filenames/rabbitmq.py index f712f21..68534e6 100644 --- a/consumer/src/adapters/fetch_filenames/rabbitmq.py +++ b/consumer/src/adapters/fetch_filenames/rabbitmq.py @@ -78,8 +78,7 @@ def close(self) -> bool: try: if self._conn is not None: self._conn.close() - return True - return False + return True except Exception as e: logging.exception(e) return False diff --git a/consumer/src/adapters/upsert_iot_records/postgres.py b/consumer/src/adapters/upsert_iot_records/postgres.py index 266b5df..0ee8605 100644 --- a/consumer/src/adapters/upsert_iot_records/postgres.py +++ b/consumer/src/adapters/upsert_iot_records/postgres.py @@ -64,7 +64,7 @@ def _get_sql_stmt(self) -> str: sensor_id, value ) VALUES ( - %(datetime)s, + %(record_time)s, %(sensor_id)s, %(value)s ) ON CONFLICT (record_time, sensor_id) DO UPDATE SET @@ -133,3 +133,13 @@ def _upsert_batch(self, iot_records: Sequence[IOTRecord]) -> list[bool]: self._reset_conn() successes.extend([False] * len(batch)) return successes + + @override + def close(self) -> bool: + try: + if self._conn is not None: + self._conn.close() + return True + except Exception as e: + logging.exception(e) + return False diff --git a/consumer/src/deployments/scripts/main.py b/consumer/src/deployments/scripts/main.py index ab51d84..9a3306c 100644 --- a/consumer/src/deployments/scripts/main.py +++ b/consumer/src/deployments/scripts/main.py @@ -25,8 +25,7 @@ upsert_iot_records_client = PostgresUpsertIOTRecordsClient( host=PostgresConfig.HOST, port=PostgresConfig.PORT, - username=PostgresConfig.USERNAME, - password=PostgresConfig.PASSWORD, + credentials_service=lambda: (PostgresConfig.USERNAME, PostgresConfig.PASSWORD), database=PostgresConfig.DATABASE, batch_upsert_size=PostgresConfig.BATCH_UPSERT_SIZE, ) diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/conftest.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/conftest.py index e69de29..0f6b85a 100644 --- a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/conftest.py +++ b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/conftest.py @@ -0,0 +1,30 @@ +from src.adapters.fetch_filenames.rabbitmq import RabbitMQFetchFilenamesClient +from src.deployments.scripts.config import RabbitMQConfig +import pika +import pytest +from pytest import MonkeyPatch + + +@pytest.fixture(scope="function") +def rabbitmq_fetch_filenames_client() -> RabbitMQFetchFilenamesClient: + return RabbitMQFetchFilenamesClient( + host=RabbitMQConfig.HOST, + port=RabbitMQConfig.PORT, + credentials_service=lambda: (RabbitMQConfig.USERNAME, RabbitMQConfig.PASSWORD), + queue=RabbitMQConfig.QUEUE, + polling_timeout=RabbitMQConfig.POLLING_TIMEOUT, + ) + + +@pytest.fixture(scope="function") +def raw_rabbitmq_pika_conn_config() -> tuple[pika.BaseConnection, str]: + pika_conn = pika.BlockingConnection( + pika.ConnectionParameters( + host=RabbitMQConfig.HOST, + port=RabbitMQConfig.PORT, + credentials=pika.PlainCredentials( + RabbitMQConfig.USERNAME, RabbitMQConfig.PASSWORD + ), + ) + ) + return pika_conn, RabbitMQConfig.QUEUE diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_helloworld.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_helloworld.py deleted file mode 100644 index 68c495a..0000000 --- a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_helloworld.py +++ /dev/null @@ -1,2 +0,0 @@ -def test_helloworld(): - assert True diff --git a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/conftest.py b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/conftest.py index e69de29..db892d2 100644 --- a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/conftest.py +++ b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/conftest.py @@ -0,0 +1,52 @@ +from src.adapters.upsert_iot_records.postgres import PostgresUpsertIOTRecordsClient +from src.deployments.scripts.config import PostgresConfig +import psycopg2 +import pytest + + +@pytest.fixture(scope="function") +def postgres_upsert_iot_records_client() -> PostgresUpsertIOTRecordsClient: + return PostgresUpsertIOTRecordsClient( + host=PostgresConfig.HOST, + port=PostgresConfig.PORT, + credentials_service=lambda: (PostgresConfig.USERNAME, PostgresConfig.PASSWORD), + database=PostgresConfig.DATABASE, + batch_upsert_size=PostgresConfig.BATCH_UPSERT_SIZE, + ) + + +@pytest.fixture(scope="function") +def raw_postgres_psycopg2_conn_config() -> psycopg2.extensions.connection: + with psycopg2.connect( + host=PostgresConfig.HOST, + port=PostgresConfig.PORT, + user=PostgresConfig.USERNAME, + password=PostgresConfig.PASSWORD, + database=PostgresConfig.DATABASE, + ) as conn: + yield conn + + +@pytest.fixture(scope="function", autouse=True) +def setup_teardown_postgres_tables( + raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, +) -> None: + with raw_postgres_psycopg2_conn_config.cursor() as cursor: + try: + cursor.execute( + """ + TRUNCATE TABLE records; + """ + ) + raw_postgres_psycopg2_conn_config.commit() + yield + except Exception as e: + raw_postgres_psycopg2_conn_config.rollback() + raise e + finally: + cursor.execute( + """ + TRUNCATE TABLE records; + """ + ) + raw_postgres_psycopg2_conn_config.commit() diff --git a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_close_conn_failed.py b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_close_conn_failed.py new file mode 100644 index 0000000..b7003b8 --- /dev/null +++ b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_close_conn_failed.py @@ -0,0 +1,19 @@ +from pytest import LogCaptureFixture +from src.adapters.upsert_iot_records.postgres import PostgresUpsertIOTRecordsClient +from .utils import random_iot_records, MockedPostgresConnection +import pytest + + +def test_close_conn_failed( + postgres_upsert_iot_records_client: PostgresUpsertIOTRecordsClient, + caplog: LogCaptureFixture, +): + postgres_upsert_iot_records_client.upsert(random_iot_records()[0]) + + assert postgres_upsert_iot_records_client._conn is not None + + postgres_upsert_iot_records_client._conn = MockedPostgresConnection() + + with caplog.at_level("ERROR"): + assert not postgres_upsert_iot_records_client.close() + assert "Failed to close!" in caplog.text diff --git a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_close_conn_successful.py b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_close_conn_successful.py new file mode 100644 index 0000000..3d0bb55 --- /dev/null +++ b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_close_conn_successful.py @@ -0,0 +1,16 @@ +from src.adapters.upsert_iot_records.postgres import PostgresUpsertIOTRecordsClient +from .utils import random_iot_records + + +def test_close_conn_successful( + postgres_upsert_iot_records_client: PostgresUpsertIOTRecordsClient, +): + postgres_upsert_iot_records_client.upsert(random_iot_records()[0]) + assert postgres_upsert_iot_records_client._conn is not None + assert postgres_upsert_iot_records_client.close() + + +def test_none_conn_close_successful( + postgres_upsert_iot_records_client: PostgresUpsertIOTRecordsClient, +): + assert postgres_upsert_iot_records_client.close() diff --git a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_conn.py b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_conn.py index e69de29..2edb0ad 100644 --- a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_conn.py +++ b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_conn.py @@ -0,0 +1,227 @@ +import pytest +from src.adapters.upsert_iot_records.postgres import PostgresUpsertIOTRecordsClient +from src.deployments.scripts.config import PostgresConfig +from src.entities import IOTRecord +import psycopg2 +from .utils import random_iot_records, MockedPostgresConnection +from pytest import MonkeyPatch + + +@pytest.mark.smoke +@pytest.mark.parametrize("iot_record", random_iot_records()) +def test_upsert_single_failed_conn( + postgres_upsert_iot_records_client: PostgresUpsertIOTRecordsClient, + raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, + iot_record: IOTRecord, + monkeypatch: MonkeyPatch, +): + def mocked_failed_conn( + self, + *args, + **kwargs, + ) -> None: + raise Exception("Failed to connect") + + monkeypatch.setattr(psycopg2, "connect", mocked_failed_conn) + + with pytest.raises(Exception) as e: + assert not postgres_upsert_iot_records_client.upsert(iot_record) + assert e.value == "Failed to connect" + + with raw_postgres_psycopg2_conn_config.cursor() as cursor: + cursor.execute( + """ + SELECT + record_time, + TRIM(sensor_id), + value + FROM records + WHERE record_time = %s + AND sensor_id = %s + """, + (iot_record.record_time, iot_record.sensor_id), + ) + + assert cursor.fetchone() is None + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "iot_records", + [random_iot_records() for _ in range(5)], +) +def test_upsert_batch_failed_conn( + postgres_upsert_iot_records_client: PostgresUpsertIOTRecordsClient, + raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, + iot_records: list[IOTRecord], + monkeypatch: MonkeyPatch, +): + def mocked_failed_conn( + self, + *args, + **kwargs, + ) -> None: + raise Exception("Failed to connect") + + monkeypatch.setattr(psycopg2, "connect", mocked_failed_conn) + + with pytest.raises(Exception) as e: + assert not any(postgres_upsert_iot_records_client.upsert(iot_records)) + assert e.value == "Failed to connect" + + with raw_postgres_psycopg2_conn_config.cursor() as cursor: + stmt = """ + SELECT + record_time, + TRIM(sensor_id), + value + FROM records + WHERE (record_time, sensor_id) IN ({}) + """.format( + ",".join(["%s"] * len(iot_records)) + ) + cursor.execute( + stmt, + [ + (iot_record.record_time, iot_record.sensor_id) + for iot_record in iot_records + ], + ) + + assert cursor.fetchone() is None + + +@pytest.mark.smoke +@pytest.mark.parametrize("iot_record", random_iot_records()) +def test_upsert_single_wrong_credentials( + raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, + iot_record: IOTRecord, +): + postgres_upsert_iot_records_client = PostgresUpsertIOTRecordsClient( + host=PostgresConfig.HOST, + port=PostgresConfig.PORT, + credentials_service=lambda: ("wrong", "wrong"), + database=PostgresConfig.DATABASE, + batch_upsert_size=1, + ) + + with pytest.raises(Exception) as e: + assert not postgres_upsert_iot_records_client.upsert(iot_record) + assert "ACCESS_REFUSED" in e.value and "403" in e.value + + with raw_postgres_psycopg2_conn_config.cursor() as cursor: + cursor.execute( + """ + SELECT + record_time, + TRIM(sensor_id), + value + FROM records + WHERE record_time = %s + AND sensor_id = %s + """, + (iot_record.record_time, iot_record.sensor_id), + ) + + assert cursor.fetchone() is None + + +@pytest.mark.slow +@pytest.mark.smoke +@pytest.mark.parametrize("iot_record", random_iot_records()) +def test_upsert_single_wrong_host( + raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, + iot_record: IOTRecord, +): + postgres_upsert_iot_records_client = PostgresUpsertIOTRecordsClient( + host="wrong", + port=PostgresConfig.PORT, + credentials_service=lambda: (PostgresConfig.USERNAME, PostgresConfig.PASSWORD), + database=PostgresConfig.DATABASE, + batch_upsert_size=1, + ) + + with pytest.raises(Exception) as e: + assert not postgres_upsert_iot_records_client.upsert(iot_record) + assert "ACCESS_REFUSED" in e.value and "403" in e.value + + with raw_postgres_psycopg2_conn_config.cursor() as cursor: + cursor.execute( + """ + SELECT + record_time, + TRIM(sensor_id), + value + FROM records + WHERE record_time = %s + AND sensor_id = %s + """, + (iot_record.record_time, iot_record.sensor_id), + ) + + assert cursor.fetchone() is None + + +@pytest.mark.slow +@pytest.mark.parametrize("iot_record", random_iot_records()) +def test_upsert_single_failed_conn_reset_conn( + postgres_upsert_iot_records_client: PostgresUpsertIOTRecordsClient, + iot_record: IOTRecord, + monkeypatch: MonkeyPatch, +): + assert postgres_upsert_iot_records_client._conn is None + assert postgres_upsert_iot_records_client.upsert(iot_record) + conn = postgres_upsert_iot_records_client._conn + assert conn is not None + + def mock_failed_conn( + self, + *args, + **kwargs, + ) -> None: + raise Exception("Failed to connect") + + monkeypatch.setattr(PostgresUpsertIOTRecordsClient, "_get_conn", mock_failed_conn) + + assert not postgres_upsert_iot_records_client.upsert(iot_record) + + monkeypatch.undo() + + assert postgres_upsert_iot_records_client._conn is None + + assert postgres_upsert_iot_records_client.upsert(iot_record) + assert postgres_upsert_iot_records_client._conn != conn + + +@pytest.mark.slow +@pytest.mark.parametrize( + "iot_records", + [random_iot_records() for _ in range(5)], +) +def test_upsert_batch_failed_conn_reset_conn( + postgres_upsert_iot_records_client: PostgresUpsertIOTRecordsClient, + iot_records: list[IOTRecord], + monkeypatch: MonkeyPatch, +): + assert postgres_upsert_iot_records_client._conn is None + assert all(postgres_upsert_iot_records_client.upsert(iot_records)) + conn = postgres_upsert_iot_records_client._conn + assert conn is not None + + def mock_failed_conn( + self, + *args, + **kwargs, + ) -> None: + raise Exception("Failed to connect") + + monkeypatch.setattr(PostgresUpsertIOTRecordsClient, "_get_conn", mock_failed_conn) + + assert not any(postgres_upsert_iot_records_client.upsert(iot_records)) + + monkeypatch.undo() + + assert postgres_upsert_iot_records_client._conn is None + + assert all(postgres_upsert_iot_records_client.upsert(iot_records)) + assert postgres_upsert_iot_records_client._conn != conn diff --git a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_upsert.py b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_upsert.py index e69de29..a5f5425 100644 --- a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_upsert.py +++ b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_upsert.py @@ -0,0 +1,195 @@ +from __future__ import annotations +from datetime import datetime +from src.adapters.upsert_iot_records.postgres import PostgresUpsertIOTRecordsClient +from .utils import random_iot_records, MockedPostgresCursor, MockedPostgresConnection +import pytest +from src.entities import IOTRecord +import psycopg2 +from pytest import MonkeyPatch + + +@pytest.mark.smoke +@pytest.mark.parametrize("iot_record", random_iot_records()) +def test_upsert_single_failed( + postgres_upsert_iot_records_client: PostgresUpsertIOTRecordsClient, + raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, + iot_record: IOTRecord, + monkeypatch: MonkeyPatch, +): + monkeypatch.setattr( + psycopg2, "connect", lambda *args, **kwargs: MockedPostgresConnection() + ) + + with pytest.raises(Exception) as e: + assert not postgres_upsert_iot_records_client.upsert(iot_record) + assert e.value == "Failed to execute!" + + with raw_postgres_psycopg2_conn_config.cursor() as cursor: + cursor.execute( + """ + SELECT + record_time, + TRIM(sensor_id), + value + FROM records + WHERE record_time = %s + AND sensor_id = %s + """, + (iot_record.record_time, iot_record.sensor_id), + ) + + assert cursor.fetchone() is None + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "iot_records", + [random_iot_records() for _ in range(5)], +) +def test_upsert_batch_failed( + postgres_upsert_iot_records_client: PostgresUpsertIOTRecordsClient, + raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, + iot_records: list[IOTRecord], + monkeypatch: MonkeyPatch, +): + monkeypatch.setattr( + psycopg2, "connect", lambda *args, **kwargs: MockedPostgresConnection() + ) + + with pytest.raises(Exception) as e: + assert not any(postgres_upsert_iot_records_client.upsert(iot_records)) + assert e.value == "Failed to execute!" + + with raw_postgres_psycopg2_conn_config.cursor() as cursor: + stmt = """ + SELECT + record_time, + TRIM(sensor_id), + value + FROM records + WHERE (record_time, sensor_id) IN ({}) + """.format( + ",".join(["%s"] * len(iot_records)) + ) + cursor.execute( + stmt, + [ + (iot_record.record_time, iot_record.sensor_id) + for iot_record in iot_records + ], + ) + + assert cursor.fetchone() is None + + +@pytest.mark.slow +@pytest.mark.parametrize( + "iot_records", + [random_iot_records() for _ in range(5)], +) +def test_upsert_batch_partial_failed( + postgres_upsert_iot_records_client: PostgresUpsertIOTRecordsClient, + raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, + iot_records: list[IOTRecord], + monkeypatch: MonkeyPatch, +): + new_postgres_upsert_iot_records_client = PostgresUpsertIOTRecordsClient( + host=postgres_upsert_iot_records_client._host, + port=postgres_upsert_iot_records_client._port, + credentials_service=postgres_upsert_iot_records_client._credentials_service, + database=postgres_upsert_iot_records_client._database, + batch_upsert_size=1, + ) + + username, password = postgres_upsert_iot_records_client._credentials_service() + + valid_psycopg2_conn = psycopg2.connect( + host=postgres_upsert_iot_records_client._host, + port=postgres_upsert_iot_records_client._port, + user=username, + password=password, + database=postgres_upsert_iot_records_client._database, + ) + + monkeypatch.setattr( + psycopg2, "connect", lambda *args, **kwargs: MockedPostgresConnection() + ) + + counter = 0 + + def mocked_partially_failed_upsert( + self, + *args, + **kwargs, + ) -> None: + nonlocal counter + counter += 1 + if counter == 3: + raise Exception("Failed to execute!") + else: + with valid_psycopg2_conn.cursor() as cursor: + try: + cursor.executemany( + *args, + ) + valid_psycopg2_conn.commit() + except Exception as e: + valid_psycopg2_conn.rollback() + raise e + + monkeypatch.setattr( + MockedPostgresCursor, "executemany", mocked_partially_failed_upsert + ) + + with pytest.raises(Exception) as e: + upsert_successes = new_postgres_upsert_iot_records_client.upsert(iot_records) + + assert not all(upsert_successes) + assert any(upsert_successes) + assert upsert_successes[2] == False + assert e.value == "Failed to execute!" + + successful_records = [ + iot_record + for iot_record, success in zip(iot_records, upsert_successes) + if success + ] + + with raw_postgres_psycopg2_conn_config.cursor() as cursor: + stmt = """ + SELECT + record_time, + TRIM(sensor_id), + value + FROM records + WHERE (record_time, sensor_id) IN ({}) + """.format( + ",".join(["%s"] * len(successful_records)) + ) + cursor.execute( + stmt, + [ + (iot_record.record_time, iot_record.sensor_id) + for iot_record in successful_records + ], + ) + + record_time_sensor_id_map: dict[tuple[datetime, str], float] = {} + + for record_time, sensor_id, value in cursor.fetchall(): + record_time_sensor_id_map[(record_time, sensor_id)] = value + + for iot_record in successful_records: + assert ( + pytest.approx( + record_time_sensor_id_map[ + (iot_record.record_time, iot_record.sensor_id) + ] + ) + == iot_record.value + ) + record_time_sensor_id_map.pop( + (iot_record.record_time, iot_record.sensor_id) + ) + + assert len(record_time_sensor_id_map) == 0 diff --git a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_successful_upsert.py b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_successful_upsert.py index 1d443f1..7180dad 100644 --- a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_successful_upsert.py +++ b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_successful_upsert.py @@ -1 +1,85 @@ +from datetime import datetime from src.adapters.upsert_iot_records.postgres import PostgresUpsertIOTRecordsClient +from .utils import random_iot_records +import pytest +from src.entities import IOTRecord +import psycopg2 + + +@pytest.mark.smoke +@pytest.mark.parametrize("iot_record", random_iot_records()) +def test_upsert_single_iot_record( + postgres_upsert_iot_records_client: PostgresUpsertIOTRecordsClient, + raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, + iot_record: IOTRecord, +): + assert postgres_upsert_iot_records_client.upsert(iot_record) + + with raw_postgres_psycopg2_conn_config.cursor() as cursor: + cursor.execute( + """ + SELECT + record_time, + TRIM(sensor_id), + value + FROM records + WHERE record_time = %s + AND sensor_id = %s + """, + (iot_record.record_time, iot_record.sensor_id), + ) + + record_time, sensor_id, value = cursor.fetchone() + + assert record_time == iot_record.record_time + assert sensor_id == iot_record.sensor_id + assert pytest.approx(value) == iot_record.value + + +@pytest.mark.smoke +@pytest.mark.parametrize("iot_records", [random_iot_records() for _ in range(5)]) +def test_upsert_batch_iot_records( + postgres_upsert_iot_records_client: PostgresUpsertIOTRecordsClient, + raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, + iot_records: list[IOTRecord], +): + assert all(postgres_upsert_iot_records_client.upsert(iot_records)) + + with raw_postgres_psycopg2_conn_config.cursor() as cursor: + stmt = """ + SELECT + record_time, + TRIM(sensor_id), + value + FROM records + WHERE (record_time, sensor_id) IN ({}) + """.format( + ",".join(["%s"] * len(iot_records)) + ) + cursor.execute( + stmt, + [ + (iot_record.record_time, iot_record.sensor_id) + for iot_record in iot_records + ], + ) + + record_time_sensor_id_map: dict[tuple[datetime, str], float] = {} + + for record_time, sensor_id, value in cursor.fetchall(): + record_time_sensor_id_map[(record_time, sensor_id)] = value + + for iot_record in iot_records: + assert ( + pytest.approx( + record_time_sensor_id_map[ + (iot_record.record_time, iot_record.sensor_id) + ] + ) + == iot_record.value + ) + record_time_sensor_id_map.pop( + (iot_record.record_time, iot_record.sensor_id) + ) + + assert len(record_time_sensor_id_map) == 0 diff --git a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_upsert_idempotent.py b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_upsert_idempotent.py new file mode 100644 index 0000000..c5e87ea --- /dev/null +++ b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_upsert_idempotent.py @@ -0,0 +1,91 @@ +from datetime import datetime +from src.adapters.upsert_iot_records.postgres import PostgresUpsertIOTRecordsClient +from .utils import random_iot_records +import pytest +from src.entities import IOTRecord +import psycopg2 + + +@pytest.mark.smoke +@pytest.mark.parametrize("iot_record", random_iot_records()) +def test_upsert_single_iot_record_idempotent( + postgres_upsert_iot_records_client: PostgresUpsertIOTRecordsClient, + raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, + iot_record: IOTRecord, +): + assert postgres_upsert_iot_records_client.upsert(iot_record) + assert postgres_upsert_iot_records_client.upsert(iot_record) + + with raw_postgres_psycopg2_conn_config.cursor() as cursor: + cursor.execute( + """ + SELECT + record_time, + TRIM(sensor_id), + value + FROM records + WHERE record_time = %s + AND sensor_id = %s + """, + (iot_record.record_time, iot_record.sensor_id), + ) + + results = cursor.fetchall() + for record_time, sensor_id, value in results: + assert record_time == iot_record.record_time + assert sensor_id == iot_record.sensor_id + assert pytest.approx(value) == iot_record.value + + assert len(results) == 1 + + +@pytest.mark.smoke +@pytest.mark.parametrize("iot_records", [random_iot_records() for _ in range(5)]) +def test_upsert_batch_iot_records_idempotent( + postgres_upsert_iot_records_client: PostgresUpsertIOTRecordsClient, + raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, + iot_records: list[IOTRecord], +): + assert all(postgres_upsert_iot_records_client.upsert(iot_records)) + assert all(postgres_upsert_iot_records_client.upsert(iot_records)) + + with raw_postgres_psycopg2_conn_config.cursor() as cursor: + stmt = """ + SELECT + record_time, + TRIM(sensor_id), + value + FROM records + WHERE (record_time, sensor_id) IN ({}) + """.format( + ",".join(["%s"] * len(iot_records)) + ) + cursor.execute( + stmt, + [ + (iot_record.record_time, iot_record.sensor_id) + for iot_record in iot_records + ], + ) + + record_time_sensor_id_map: dict[tuple[datetime, str], float] = {} + + for record_time, sensor_id, value in cursor.fetchall(): + if (record_time, sensor_id) in record_time_sensor_id_map: + assert False + record_time_sensor_id_map[(record_time, sensor_id)] = value + + for iot_record in iot_records: + assert ( + pytest.approx( + record_time_sensor_id_map[ + (iot_record.record_time, iot_record.sensor_id) + ] + ) + == iot_record.value + ) + record_time_sensor_id_map.pop( + (iot_record.record_time, iot_record.sensor_id) + ) + + assert len(record_time_sensor_id_map) == 0 diff --git a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/utils.py b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/utils.py new file mode 100644 index 0000000..fcb0e9a --- /dev/null +++ b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/utils.py @@ -0,0 +1,67 @@ +from __future__ import annotations +import string +from decimal import Decimal +import random +from datetime import datetime, timedelta +from zoneinfo import ZoneInfo +import zoneinfo +from src.entities.iot_record import IOTRecord + + +def random_iot_records() -> list[IOTRecord]: + all_iot_records = [] + all_available_timezones = list(zoneinfo.available_timezones()) + for _ in range(5): + random_timezone = random.choice(all_available_timezones) + random_time_delta = timedelta( + hours=random.randint(0, 24), + minutes=random.randint(0, 60), + seconds=random.randint(0, 60), + ) + all_iot_records.append( + IOTRecord( + record_time=datetime.now(tz=ZoneInfo(random_timezone)) + - random_time_delta, + sensor_id="".join(random.choices(string.ascii_letters, k=10)), + value=Decimal(random.random() * 100), + ) + ) + return all_iot_records + + +class MockedPostgresCursor: + def __enter__(self, *args, **kwargs) -> MockedPostgresCursor: + return self + + def __exit__(self, *args, **kwargs) -> None: + pass + + def execute(self, *args, **kwargs) -> None: + raise Exception("Failed to execute!") + + def executemany(self, *args, **kwargs) -> None: + raise Exception("Failed to execute!") + + +class MockedPostgresConnection: + def cursor(self) -> MockedPostgresCursor: + return MockedPostgresCursor() + + def commit(self) -> None: + pass + + def rollback(self) -> None: + pass + + def __enter__(self, *args, **kwargs) -> MockedPostgresConnection: + return self + + def __exit__(self, *args, **kwargs) -> None: + pass + + @property + def closed(self) -> bool: + return False + + def close(self) -> None: + raise Exception("Failed to close!") diff --git a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_conn.py b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_conn.py index 7f161a4..7e82051 100644 --- a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_conn.py +++ b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_conn.py @@ -37,13 +37,13 @@ def mocked_failed_conn( @pytest.mark.smoke @pytest.mark.parametrize( - "random_filenames", + "filenames", [random_filenames() for _ in range(5)], ) def test_publish_batch_failed_conn( rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], - random_filenames: list[str], + filenames: list[str], monkeypatch: MonkeyPatch, ): def mocked_failed_conn( @@ -56,13 +56,13 @@ def mocked_failed_conn( monkeypatch.setattr(pika.BlockingConnection, "__init__", mocked_failed_conn) with pytest.raises(Exception) as e: - assert not any(rabbitmq_publish_filenames_client.publish(random_filenames)) + assert not any(rabbitmq_publish_filenames_client.publish(filenames)) assert e.value == "Failed to connect" pika_conn, queue = raw_rabbitmq_pika_conn_config channel = pika_conn.channel() - for _ in random_filenames: + for _ in filenames: method_frame, _, body = channel.basic_get(queue=queue) assert method_frame is None assert body is None @@ -148,15 +148,15 @@ def mock_failed_basic_publish( @pytest.mark.slow @pytest.mark.parametrize( - "random_filenames", + "filenames", [random_filenames() for _ in range(5)], ) def test_publish_batch_failed_conn_reset_conn( rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, - random_filenames: list[str], + filenames: list[str], monkeypatch: MonkeyPatch, ): - assert all(rabbitmq_publish_filenames_client.publish(random_filenames)) + assert all(rabbitmq_publish_filenames_client.publish(filenames)) conn = rabbitmq_publish_filenames_client._conn def mock_failed_basic_publish( @@ -170,9 +170,9 @@ def mock_failed_basic_publish( pika.channel.Channel, "basic_publish", mock_failed_basic_publish ) - assert not any(rabbitmq_publish_filenames_client.publish(random_filenames)) + assert not any(rabbitmq_publish_filenames_client.publish(filenames)) monkeypatch.undo() - assert rabbitmq_publish_filenames_client.publish(random_filenames) + assert rabbitmq_publish_filenames_client.publish(filenames) assert rabbitmq_publish_filenames_client._conn != conn diff --git a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_publish.py b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_publish.py index 485292a..0750723 100644 --- a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_publish.py +++ b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_publish.py @@ -29,35 +29,35 @@ def test_publish_single_failed( @pytest.mark.smoke @pytest.mark.usefixtures("patch_failed_publish") @pytest.mark.parametrize( - "random_filenames", + "filenames", [random_filenames() for _ in range(5)], ) def test_publish_batch_failed( rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], - random_filenames: list[str], + filenames: list[str], ): with pytest.raises(Exception) as e: - assert not any(rabbitmq_publish_filenames_client.publish(random_filenames)) + assert not any(rabbitmq_publish_filenames_client.publish(filenames)) assert e.value == "Failed to publish" pika_conn, queue = raw_rabbitmq_pika_conn_config channel = pika_conn.channel() - for _ in random_filenames: + for _ in filenames: method_frame, _, body = channel.basic_get(queue=queue) assert method_frame is None assert body is None @pytest.mark.parametrize( - "random_filenames", + "filenames", [random_filenames() for _ in range(5)], ) def test_publish_batch_partial_failed( rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], - random_filenames: list[str], + filenames: list[str], monkeypatch: MonkeyPatch, ): counter = 0 @@ -95,11 +95,11 @@ def mocked_partially_failed_basic_publish( ) with pytest.raises(Exception) as e: - publish_successes = rabbitmq_publish_filenames_client.publish(random_filenames) + publish_successes = rabbitmq_publish_filenames_client.publish(filenames) successes_filenames = [ filename - for filename, success in zip(random_filenames, publish_successes) + for filename, success in zip(filenames, publish_successes) if success ] assert not all(publish_successes) diff --git a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_publish_non_idempotent.py b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_publish_non_idempotent.py new file mode 100644 index 0000000..2540f1f --- /dev/null +++ b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_publish_non_idempotent.py @@ -0,0 +1,57 @@ +import pytest +from .utils import random_filenames +from src.adapters.publish_filenames.rabbitmq import RabbitMQPublishFilenamesClient +import pika +import pytest + + +@pytest.mark.smoke +@pytest.mark.parametrize("filename", random_filenames()) +def test_publish_single_non_idempotent( + rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + filename: str, +): + assert rabbitmq_publish_filenames_client.publish(filename) + + pika_conn, queue = raw_rabbitmq_pika_conn_config + + channel = pika_conn.channel() + method_frame, _, body = channel.basic_get(queue=queue) + assert method_frame is not None + assert body.decode() == filename + channel.basic_ack(method_frame.delivery_tag) + + assert rabbitmq_publish_filenames_client.publish(filename) + assert rabbitmq_publish_filenames_client.publish(filename) + + method_frame, _, body = channel.basic_get(queue=queue) + assert method_frame is not None + assert body.decode() == filename + channel.basic_ack(method_frame.delivery_tag) + + method_frame, _, body = channel.basic_get(queue=queue) + assert method_frame is None + assert body is None + + +@pytest.mark.smoke +@pytest.mark.parametrize("filenames", [random_filenames() for _ in range(5)]) +def test_publish_batch_non_idempotent( + rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + filenames: list[str], +): + assert all(rabbitmq_publish_filenames_client.publish(filenames)) + assert all(rabbitmq_publish_filenames_client.publish(filenames)) + + pika_conn, queue = raw_rabbitmq_pika_conn_config + filenames_counter = {filename: 0 for filename in filenames} + channel = pika_conn.channel() + method_frame, _, body = channel.basic_get(queue=queue) + while method_frame is not None: + filenames_counter[body.decode()] += 1 + channel.basic_ack(method_frame.delivery_tag) + method_frame, _, body = channel.basic_get(queue=queue) + + assert all(count == 2 for count in filenames_counter.values()) diff --git a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_successful_publish.py b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_successful_publish.py index db8aa79..a67841b 100644 --- a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_successful_publish.py +++ b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_successful_publish.py @@ -25,20 +25,20 @@ def test_publish_single_success( @pytest.mark.smoke @pytest.mark.parametrize( - "random_filenames", + "filenames", [random_filenames() for _ in range(5)], ) def test_publish_batch_success( rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], - random_filenames: list[str], + filenames: list[str], ): - assert all(rabbitmq_publish_filenames_client.publish(random_filenames)) + assert all(rabbitmq_publish_filenames_client.publish(filenames)) pika_conn, queue = raw_rabbitmq_pika_conn_config channel = pika_conn.channel() - for filename in random_filenames: + for filename in filenames: method_frame, _, body = channel.basic_get(queue=queue) assert method_frame is not None assert body.decode() == filename From c2ccbf4f675d64710151f8eda8a60e874236c3d9 Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 22:32:00 +0800 Subject: [PATCH 22/36] Updated the cicd --- .github/workflows/test.yml | 2 ++ .../test_postgres/test_close_conn_failed.py | 1 - .../test_rabbitmq/test_publish_non_idempotent.py | 5 +++-- .../test_rabbitmq/test_successful_publish.py | 8 ++++++-- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ed57614..c858141 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -132,6 +132,8 @@ jobs: --health-interval 5s --health-timeout 30s --health-retries 3 + ports: + - ${{ needs.load-dotenv.outputs.postgres-port }}:5432 steps: - name: Checkout uses: actions/checkout@v4 diff --git a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_close_conn_failed.py b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_close_conn_failed.py index b7003b8..b5c503a 100644 --- a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_close_conn_failed.py +++ b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_close_conn_failed.py @@ -1,7 +1,6 @@ from pytest import LogCaptureFixture from src.adapters.upsert_iot_records.postgres import PostgresUpsertIOTRecordsClient from .utils import random_iot_records, MockedPostgresConnection -import pytest def test_close_conn_failed( diff --git a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_publish_non_idempotent.py b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_publish_non_idempotent.py index 2540f1f..3e1c323 100644 --- a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_publish_non_idempotent.py +++ b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_publish_non_idempotent.py @@ -31,8 +31,9 @@ def test_publish_single_non_idempotent( channel.basic_ack(method_frame.delivery_tag) method_frame, _, body = channel.basic_get(queue=queue) - assert method_frame is None - assert body is None + assert method_frame is not None + assert body.decode() == filename + channel.basic_ack(method_frame.delivery_tag) @pytest.mark.smoke diff --git a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_successful_publish.py b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_successful_publish.py index a67841b..4dba1f4 100644 --- a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_successful_publish.py +++ b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_successful_publish.py @@ -37,9 +37,13 @@ def test_publish_batch_success( pika_conn, queue = raw_rabbitmq_pika_conn_config + all_filenames = [] + channel = pika_conn.channel() - for filename in filenames: + while len(all_filenames) < len(filenames): method_frame, _, body = channel.basic_get(queue=queue) assert method_frame is not None - assert body.decode() == filename + all_filenames.append(body.decode()) channel.basic_ack(method_frame.delivery_tag) + + assert sorted(all_filenames) == sorted(filenames) From c446ac2c42cace36bb483d2be53ca962773129b0 Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 22:38:05 +0800 Subject: [PATCH 23/36] Init database --- .github/workflows/test.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c858141..ed9224f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -154,6 +154,14 @@ jobs: if: steps.cache.outputs.cache-hit != 'true' working-directory: ${{env.WORKDIR}} run: pip install -r requirements-dev.txt + - name: Setup database + if: steps.cache.outputs.cache-hit != 'true' + run: | + PGPASSWORD=${{ needs.load-dotenv.outputs.postgres-password }} \ + psql -h localhost \ + -p ${{ needs.load-dotenv.outputs.postgres-port }} \ + -U ${{ needs.load-dotenv.outputs.postgres-username }} \ + -tc "$(cat database/assets/create_records_table.sql)" - name: Run tests if: steps.cache.outputs.cache-hit != 'true' run: | From e31dee1749fc790ad69f15d656c31635981bd451 Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 22:45:36 +0800 Subject: [PATCH 24/36] Added the init db test script --- .github/workflows/test.yml | 8 ------ .../test_postgres/conftest.py | 26 +++++++++++++++++++ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ed9224f..c858141 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -154,14 +154,6 @@ jobs: if: steps.cache.outputs.cache-hit != 'true' working-directory: ${{env.WORKDIR}} run: pip install -r requirements-dev.txt - - name: Setup database - if: steps.cache.outputs.cache-hit != 'true' - run: | - PGPASSWORD=${{ needs.load-dotenv.outputs.postgres-password }} \ - psql -h localhost \ - -p ${{ needs.load-dotenv.outputs.postgres-port }} \ - -U ${{ needs.load-dotenv.outputs.postgres-username }} \ - -tc "$(cat database/assets/create_records_table.sql)" - name: Run tests if: steps.cache.outputs.cache-hit != 'true' run: | diff --git a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/conftest.py b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/conftest.py index db892d2..6e1d551 100644 --- a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/conftest.py +++ b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/conftest.py @@ -4,6 +4,32 @@ import pytest +@pytest.fixture(scope="session", autouse=True) +def init_postgres_tables() -> None: + with psycopg2.connect( + host=PostgresConfig.HOST, + port=PostgresConfig.PORT, + user=PostgresConfig.USERNAME, + password=PostgresConfig.PASSWORD, + database=PostgresConfig.DATABASE, + ) as conn: + with conn.cursor() as cursor: + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS records ( + record_time TIMESTAMPTZ NOT NULL, + sensor_id CHAR(64) NOT NULL, + value DOUBLE PRECISION NOT NULL, + PRIMARY KEY(record_time, sensor_id) + ); + + CREATE INDEX IF NOT EXISTS idx_records_record_time ON records USING BTREE (record_time); + CREATE INDEX IF NOT EXISTS idx_records_sensor_id ON records USING BTREE (sensor_id); + """ + ) + conn.commit() + + @pytest.fixture(scope="function") def postgres_upsert_iot_records_client() -> PostgresUpsertIOTRecordsClient: return PostgresUpsertIOTRecordsClient( From a3f20fa635b3552a425c89da25788533b9ac98d7 Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 22:47:40 +0800 Subject: [PATCH 25/36] Updated the env parameter --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c858141..e008ca8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -126,7 +126,7 @@ jobs: env: POSTGRES_USER: ${{ needs.load-dotenv.outputs.postgres-username }} POSTGRES_PASSWORD: ${{ needs.load-dotenv.outputs.postgres-password }} - POSTGRES_DATABASE: ${{ needs.load-dotenv.outputs.postgres-database }} + POSTGRES_DB: ${{ needs.load-dotenv.outputs.postgres-database }} options: >- --health-cmd pg_isready --health-interval 5s From cead93e0713b3ccd66f31535269225ea931cd513 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 2 Dec 2023 14:49:04 +0000 Subject: [PATCH 26/36] Updated coverage.svg --- coverage.svg | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/coverage.svg b/coverage.svg index b3e8ba0..a8c7e72 100644 --- a/coverage.svg +++ b/coverage.svg @@ -9,13 +9,13 @@ - + coverage coverage - 88% - 88% + 92% + 92% From 6155219882e986f2e763016976822aabe62fb5ba Mon Sep 17 00:00:00 2001 From: alexau Date: Sat, 2 Dec 2023 23:10:58 +0800 Subject: [PATCH 27/36] updated the report command --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e008ca8..51fb722 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -210,7 +210,7 @@ jobs: coverage combine ${{ needs.test-producer.outputs.coverage-file-cache-path }} ${{ needs.test-consumer.outputs.coverage-file-cache-path }} - name: Generate coverage report run: | - coverage report -m + coverage report --omit="*/tests/*" -m coverage html --omit="*/tests/*" - name: upload artifact uses: actions/upload-pages-artifact@v1 From 53e4a08e3e0bb940b41a2bfef82cc8f383bacb8c Mon Sep 17 00:00:00 2001 From: alexau Date: Sun, 3 Dec 2023 02:10:02 +0800 Subject: [PATCH 28/36] Added tests for rabbitmq --- .coverage_consumer | Bin 0 -> 53248 bytes .env | 1 + Makefile | 2 +- .../src/adapters/fetch_filenames/rabbitmq.py | 51 ++++--- .../{scripts => script}/__init__.py | 0 .../deployments/{scripts => script}/config.py | 2 +- .../deployments/{scripts => script}/main.py | 0 .../{scripts => script}/setup_logging.py | 0 consumer/src/usecases/fetch_filenames.py | 4 +- .../test_rabbitmq/conftest.py | 28 +++- .../test_rabbitmq/test_close_conn_failed.py | 44 ++++++ .../test_close_conn_successful.py | 16 ++ .../test_rabbitmq/test_failed_conn.py | 142 ++++++++++++++++++ .../test_rabbitmq/test_failed_fetch.py | 98 ++++++++++++ .../test_rabbitmq/test_poll_until_timeout.py | 46 ++++++ .../test_rabbitmq/test_successful_fetch.py | 60 ++++++++ .../test_rabbitmq/utils.py | 8 + .../test_postgres/conftest.py | 2 +- .../test_postgres/test_failed_conn.py | 14 +- .../test_postgres/test_failed_upsert.py | 9 +- .../test_publish_filenames/conftest.py | 0 .../test_rabbitmq/test_failed_conn.py | 22 +-- .../test_rabbitmq/test_failed_publish.py | 17 ++- .../test_main/test_main_function_failed.py | 9 +- 24 files changed, 512 insertions(+), 63 deletions(-) create mode 100644 .coverage_consumer rename consumer/src/deployments/{scripts => script}/__init__.py (100%) rename consumer/src/deployments/{scripts => script}/config.py (99%) rename consumer/src/deployments/{scripts => script}/main.py (100%) rename consumer/src/deployments/{scripts => script}/setup_logging.py (100%) create mode 100644 consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_close_conn_failed.py create mode 100644 consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_close_conn_successful.py create mode 100644 consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_failed_conn.py create mode 100644 consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_failed_fetch.py create mode 100644 consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_poll_until_timeout.py create mode 100644 consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_successful_fetch.py create mode 100644 consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/utils.py delete mode 100644 producer/tests/test_adapters/test_publish_filenames/conftest.py diff --git a/.coverage_consumer b/.coverage_consumer new file mode 100644 index 0000000000000000000000000000000000000000..ee56acd7969777dee86fa47e40f84fd9ee7436ae GIT binary patch literal 53248 zcmeI4UyKvS8Nj{vde>{OJ$L8x`3$1Bs!ER&$6YU!R1hij+FTOUNP$2?3V~&9ukSXs zy=Hd}cOil9B@$}>2&peAQI+(qeQ1-HwlA%ehpMX7L`YGUs_6q2eSoxuL}{QXg{J*x z)@!eiI~w`GkmFm~{`SwzH{bW0Z)SGZ9`D@okm2Y`$+Rk(qihWIgg7qrbwvqsva zt}~xBs{pS=n>TQx<;u1aC@Pp%ky@X(v}$2Sw?`GN>0!YHJ7*owTg$ABJEl@Ls?5!* zZ8(NmRrFcCPW;~dRj7P*0zTyDdShaCXox$0 zkOd0Gqvj3AZth5JLDM|FMCj1#OUDguCG9~%TT!{Jm?hR^Xl});9Ej#%n;G|EXclc~ zirGjAmfq56qsD>_O1^C7+hdJIjIBchfxf^`R;pJEw5izChk|)Xx3p<}tZ~)r7H)MQ zLFi0KLUw9ssnBVLfI89Kjk`HP%&uO|%}Xqd8nx9BVp|CLHMvMP>R+npnq9a2j^uis zq@SbJOZsi3aRuDjAPU*Z)l0e4NT*ITc4Dr`i`h(ubG;Tp`z890wmCu1w3@(zf#9>7 zU(=i!k3=5hp^ugW;pwNUx4H9d4v+l6?rH3p$>Dbe<*Hh3s_Sr+K5tba9_jc2^4H#DG zuw`%Y&nm++noWBldsxYvW?9#&-q-*+w%JgoVJpmrYe%K6Egkk8)iHf&nAzkHF0`B9 z1&z*cU$9fDXw_n4yrRRfrxp5s+Bj)BUMB(>m0@U@h8o^I#aLG zCGq31$gZq9y6w=Vyf!4T#_5sGK2=z7SaV?Eop)5yox+Stm%>$HA!FmDrR8C%ZRMbs z#I6@$$-7;umsOS;7U8+lAXt*^wk0*QTvn-*;2tdHgC&#HmPi(CNp#zmk_OC*Eh6Co zutK^;RzN*hDGE6QMy%ngQDZN&c{^hb+fn_;ZhfW zsZLF8j$sRjFMs3$n-9cpN(liUwu{T5()1yq@c{ueb~M1#-XfsrsVFF#>jZCycM8z^ zmuOZ1{}kWlvIBmXBT)9fo9cI4pSX~CDgFZaCE1tyFl8q%$1aH%#2MkQ!ei05V^7~6H@#>D z5C=14?_AWpHy6x#;|#?Bv~%jtyRX{-4^xyK9%PGe97z z^6q5E8k+=`od3&@^X|qQp#Pfre_~lEnuZ5Z#5LZX>e$rg-1$25f9d9w5bz01`j~NB{{S0VIF~kN^@u0!RP}Ab}M~AQDMRZ2f=gT!?%^J|cf3XUHGOTjV$7 zSL7$;N91MlB6*G+AqR;;H1a5UkZdKJ$SM*|U4|^UApsj<=%<(ZKf3$$8|O}Z z@$|)TZU9bxb>Qg!pT76OaOsa%zIXD{yXTKzexTPRu?$@5%iLAr#D3W8=|3Auq+w?; zeR}uDh2L(8_rbwH-#=g7dhA}I7xq)VoX|sM4;LdK`^c%2IWYx$J*iWokffKAVMzwr zFP}O&ErIlT64m6Nv;9 z@i-Hb#6&E{geWp0utx%1G|G;6o{30=iEx;Q2LSB*|4`4JTwUlp5 None: self._conn = None @@ -42,36 +45,44 @@ def _get_amqp_conn(self) -> Iterator[pika.BaseConnection]: self._conn = pika.BlockingConnection(conn_parameters) yield self._conn + def _wait(self) -> None: + time.sleep(0.5) + @override - def fetch(self) -> Iterator[str]: + def fetch(self) -> Generator[str, None, None]: while True: try: + method: Optional[Basic.Deliver] = None with self._get_amqp_conn() as connection: channel: BlockingChannel = connection.channel() channel.queue_declare(queue=self._queue, durable=True) - - method: Optional[Basic.Deliver] properties: Optional[BasicProperties] body: Optional[bytes] - for method, properties, body in channel.consume( - queue=self._queue, inactivity_timeout=self._polling_timeout - ): - if method == None and properties == None and body == None: - raise StopIteration - try: - yield body.decode("utf-8") - channel.basic_ack(delivery_tag=method.delivery_tag) - except Exception as e: - logging.exception(e) - channel.basic_nack(delivery_tag=method.delivery_tag) - raise e - except StopIteration: - logging.info("No more filenames to fetch") - break + + method, properties, body = channel.basic_get( + queue=self._queue, auto_ack=False + ) + + if method is None and properties is None and body is None: + if self._last_poll_time is None: + self._last_poll_time = datetime.now() + if ( + datetime.now() - self._last_poll_time + ).total_seconds() > self._polling_timeout: + break + self._wait() + continue + + self._last_poll_time = None + + yield body.decode() + + channel.basic_ack(delivery_tag=method.delivery_tag) except Exception as e: logging.exception(e) + if method is not None: + channel.basic_reject(delivery_tag=method.delivery_tag, requeue=True) self._reset_conn() - raise e @override def close(self) -> bool: diff --git a/consumer/src/deployments/scripts/__init__.py b/consumer/src/deployments/script/__init__.py similarity index 100% rename from consumer/src/deployments/scripts/__init__.py rename to consumer/src/deployments/script/__init__.py diff --git a/consumer/src/deployments/scripts/config.py b/consumer/src/deployments/script/config.py similarity index 99% rename from consumer/src/deployments/scripts/config.py rename to consumer/src/deployments/script/config.py index 071018d..a7ed314 100644 --- a/consumer/src/deployments/scripts/config.py +++ b/consumer/src/deployments/script/config.py @@ -18,7 +18,7 @@ class RabbitMQConfig: USERNAME = os.getenv("RABBITMQ_USERNAME", "guest") PASSWORD = os.getenv("RABBITMQ_PASSWORD", "guest") QUEUE = os.getenv("RABBITMQ_QUEUE", "filenames") - POLLING_TIMEOUT = int(os.getenv("RABBITMQ_POLLING_TIMEOUT", 600)) + POLLING_TIMEOUT = int(os.getenv("RABBITMQ_POLLING_TIMEOUT", 10)) class PostgresConfig: diff --git a/consumer/src/deployments/scripts/main.py b/consumer/src/deployments/script/main.py similarity index 100% rename from consumer/src/deployments/scripts/main.py rename to consumer/src/deployments/script/main.py diff --git a/consumer/src/deployments/scripts/setup_logging.py b/consumer/src/deployments/script/setup_logging.py similarity index 100% rename from consumer/src/deployments/scripts/setup_logging.py rename to consumer/src/deployments/script/setup_logging.py diff --git a/consumer/src/usecases/fetch_filenames.py b/consumer/src/usecases/fetch_filenames.py index 39f0594..c63f791 100644 --- a/consumer/src/usecases/fetch_filenames.py +++ b/consumer/src/usecases/fetch_filenames.py @@ -1,10 +1,10 @@ from abc import ABC, abstractmethod -from typing import Iterator +from typing import Generator class FetchFilenameClient(ABC): @abstractmethod - def fetch(self) -> Iterator[str]: + def fetch(self) -> Generator[str, None, None]: ... @abstractmethod diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/conftest.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/conftest.py index 0f6b85a..7ca45ac 100644 --- a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/conftest.py +++ b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/conftest.py @@ -1,12 +1,12 @@ from src.adapters.fetch_filenames.rabbitmq import RabbitMQFetchFilenamesClient -from src.deployments.scripts.config import RabbitMQConfig +from src.deployments.script.config import RabbitMQConfig import pika import pytest from pytest import MonkeyPatch @pytest.fixture(scope="function") -def rabbitmq_fetch_filenames_client() -> RabbitMQFetchFilenamesClient: +def rabbitmq_fetch_filenames_client() -> RabbitMQConfig: return RabbitMQFetchFilenamesClient( host=RabbitMQConfig.HOST, port=RabbitMQConfig.PORT, @@ -16,6 +16,17 @@ def rabbitmq_fetch_filenames_client() -> RabbitMQFetchFilenamesClient: ) +@pytest.fixture(scope="function") +def rabbitmq_fetch_filenames_no_wait_client() -> RabbitMQConfig: + return RabbitMQFetchFilenamesClient( + host=RabbitMQConfig.HOST, + port=RabbitMQConfig.PORT, + credentials_service=lambda: (RabbitMQConfig.USERNAME, RabbitMQConfig.PASSWORD), + queue=RabbitMQConfig.QUEUE, + polling_timeout=0, + ) + + @pytest.fixture(scope="function") def raw_rabbitmq_pika_conn_config() -> tuple[pika.BaseConnection, str]: pika_conn = pika.BlockingConnection( @@ -28,3 +39,16 @@ def raw_rabbitmq_pika_conn_config() -> tuple[pika.BaseConnection, str]: ) ) return pika_conn, RabbitMQConfig.QUEUE + + +@pytest.fixture(scope="function", autouse=True) +def setup_teardown_rabbitmq_queue( + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], +) -> None: + pika_conn, queue = raw_rabbitmq_pika_conn_config + + channel = pika_conn.channel() + channel.queue_declare(queue=queue, durable=True) + channel.queue_purge(queue=queue) + yield + channel.queue_purge(queue=queue) diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_close_conn_failed.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_close_conn_failed.py new file mode 100644 index 0000000..ee46fa5 --- /dev/null +++ b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_close_conn_failed.py @@ -0,0 +1,44 @@ +from pytest import MonkeyPatch, LogCaptureFixture +import pika +from src.adapters.fetch_filenames.rabbitmq import RabbitMQFetchFilenamesClient +from .utils import random_csv_filenames + + +def test_close_conn_failed( + rabbitmq_fetch_filenames_no_wait_client: RabbitMQFetchFilenamesClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + monkeypatch: MonkeyPatch, + caplog: LogCaptureFixture, +): + conn, _ = raw_rabbitmq_pika_conn_config + + channel = conn.channel() + + channel.queue_declare( + queue=rabbitmq_fetch_filenames_no_wait_client._queue, durable=True + ) + + channel.basic_publish( + exchange="", + routing_key=rabbitmq_fetch_filenames_no_wait_client._queue, + body=random_csv_filenames()[0], + properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), + ) + + for filename in rabbitmq_fetch_filenames_no_wait_client.fetch(): + assert filename is not None + + assert rabbitmq_fetch_filenames_no_wait_client._conn is not None + + def mock_failed_close( + self, + *args, + **kwargs, + ) -> None: + raise Exception("Failed to close!") + + monkeypatch.setattr(pika.BlockingConnection, "close", mock_failed_close) + + with caplog.at_level("ERROR"): + assert not rabbitmq_fetch_filenames_no_wait_client.close() + assert "Failed to close!" in caplog.text diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_close_conn_successful.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_close_conn_successful.py new file mode 100644 index 0000000..756d329 --- /dev/null +++ b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_close_conn_successful.py @@ -0,0 +1,16 @@ +from src.adapters.fetch_filenames.rabbitmq import RabbitMQFetchFilenamesClient + + +def test_close_conn_successful( + rabbitmq_fetch_filenames_no_wait_client: RabbitMQFetchFilenamesClient, +): + for _ in rabbitmq_fetch_filenames_no_wait_client.fetch(): + pass + assert rabbitmq_fetch_filenames_no_wait_client._conn is not None + assert rabbitmq_fetch_filenames_no_wait_client.close() + + +def test_none_conn_close_successful( + rabbitmq_fetch_filenames_client: RabbitMQFetchFilenamesClient, +): + assert rabbitmq_fetch_filenames_client.close() diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_failed_conn.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_failed_conn.py new file mode 100644 index 0000000..e4d9e2c --- /dev/null +++ b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_failed_conn.py @@ -0,0 +1,142 @@ +import pytest +from .utils import random_csv_filenames +from src.adapters.fetch_filenames.rabbitmq import RabbitMQFetchFilenamesClient +from src.deployments.script.config import RabbitMQConfig +import pika +from pytest import MonkeyPatch + + +@pytest.mark.smoke +def test_fetch_failed_conn( + rabbitmq_fetch_filenames_client: RabbitMQFetchFilenamesClient, + monkeypatch: MonkeyPatch, +): + def mocked_failed_conn( + self, + *args, + **kwargs, + ) -> None: + raise Exception("Failed to connect") + + monkeypatch.setattr(pika.BlockingConnection, "__init__", mocked_failed_conn) + + monkeypatch.setattr(RabbitMQFetchFilenamesClient, "_reset_conn", mocked_failed_conn) + + with pytest.raises(Exception, match="^Failed to connect$"): + next(rabbitmq_fetch_filenames_client.fetch()) + + monkeypatch.undo() + monkeypatch.undo() + + +@pytest.mark.smoke +def test_fetch_wrong_credentials( + monkeypatch: MonkeyPatch, +): + rabbitmq_fetch_filenames_client = RabbitMQFetchFilenamesClient( + host=RabbitMQConfig.HOST, + port=RabbitMQConfig.PORT, + credentials_service=lambda: ("wrong", "wrong"), + queue=RabbitMQConfig.QUEUE, + polling_timeout=RabbitMQConfig.POLLING_TIMEOUT, + ) + + def mocked_failed_conn( + self, + *args, + **kwargs, + ) -> None: + raise Exception("Failed to connect") + + monkeypatch.setattr(RabbitMQFetchFilenamesClient, "_reset_conn", mocked_failed_conn) + + with pytest.raises(Exception, match="^Failed to connect$"): + next(rabbitmq_fetch_filenames_client.fetch()) + + monkeypatch.undo() + + +@pytest.mark.slow +@pytest.mark.smoke +def test_publish_single_wrong_host( + monkeypatch: MonkeyPatch, +): + rabbitmq_fetch_filenames_client = RabbitMQFetchFilenamesClient( + host="wrong", + port=RabbitMQConfig.PORT, + credentials_service=lambda: (RabbitMQConfig.USERNAME, RabbitMQConfig.PASSWORD), + queue=RabbitMQConfig.QUEUE, + polling_timeout=RabbitMQConfig.POLLING_TIMEOUT, + ) + + def mocked_failed_conn( + self, + *args, + **kwargs, + ) -> None: + raise Exception("Failed to connect") + + monkeypatch.setattr(RabbitMQFetchFilenamesClient, "_reset_conn", mocked_failed_conn) + + with pytest.raises(Exception, match="^Failed to connect$") as e: + next(rabbitmq_fetch_filenames_client.fetch()) + + monkeypatch.undo() + + +@pytest.mark.slow +def test_fetch_failed_conn_reset_conn( + rabbitmq_fetch_filenames_no_wait_client: RabbitMQFetchFilenamesClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + monkeypatch: MonkeyPatch, +): + conn, queue = raw_rabbitmq_pika_conn_config + + channel = conn.channel() + + channel.queue_declare(queue=queue, durable=True) + + first_published_filename = random_csv_filenames()[0] + second_published_filename = random_csv_filenames()[1] + + channel.basic_publish( + exchange="", + routing_key=rabbitmq_fetch_filenames_no_wait_client._queue, + body=first_published_filename, + properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), + ) + + for i, filename in enumerate(rabbitmq_fetch_filenames_no_wait_client.fetch()): + if i == 0: + assert rabbitmq_fetch_filenames_no_wait_client._conn is not None + conn = rabbitmq_fetch_filenames_no_wait_client._conn + + assert filename == first_published_filename + channel.basic_publish( + exchange="", + routing_key=rabbitmq_fetch_filenames_no_wait_client._queue, + body=second_published_filename, + properties=pika.BasicProperties( + delivery_mode=pika.DeliveryMode.Persistent + ), + ) + + counter = 0 + + def mock_failed_fetch( + self, + *args, + **kwargs, + ) -> None: + nonlocal counter + + if counter == 0: + counter += 1 + monkeypatch.undo() + raise Exception("Failed to fetch!") + + monkeypatch.setattr(pika.channel.Channel, "basic_get", mock_failed_fetch) + if i == 1: + assert filename == second_published_filename + assert rabbitmq_fetch_filenames_no_wait_client._conn is not None + assert rabbitmq_fetch_filenames_no_wait_client._conn != conn diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_failed_fetch.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_failed_fetch.py new file mode 100644 index 0000000..4e8aec1 --- /dev/null +++ b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_failed_fetch.py @@ -0,0 +1,98 @@ +import pytest +from .utils import random_csv_filenames +from src.adapters.fetch_filenames.rabbitmq import RabbitMQFetchFilenamesClient +import pika +import pytest +from pytest import LogCaptureFixture, MonkeyPatch + + +@pytest.mark.smoke +@pytest.mark.parametrize("filename", random_csv_filenames()) +def test_fetch_single_exception_resilience( + rabbitmq_fetch_filenames_no_wait_client: RabbitMQFetchFilenamesClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + filename: str, + monkeypatch: MonkeyPatch, + caplog: LogCaptureFixture, +): + conn, queue = raw_rabbitmq_pika_conn_config + + channel = conn.channel() + channel.queue_declare( + queue=queue, + durable=True, + ) + + counter = 0 + + def mock_failed_fetch( + self, + *args, + **kwargs, + ) -> None: + nonlocal counter + + if counter == 0: + counter += 1 + monkeypatch.undo() + raise Exception("Failed to fetch!") + + monkeypatch.setattr(pika.channel.Channel, "basic_get", mock_failed_fetch) + with caplog.at_level("ERROR"): + for fetched_filename in rabbitmq_fetch_filenames_no_wait_client.fetch(): + assert fetched_filename == filename + assert "Failed to fetch!" in caplog.text + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "filenames", + [random_csv_filenames() for _ in range(5)], +) +def test_fetch_batch_exception_resilience( + rabbitmq_fetch_filenames_no_wait_client: RabbitMQFetchFilenamesClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + filenames: list[str], + monkeypatch: MonkeyPatch, + caplog: LogCaptureFixture, +): + conn, queue = raw_rabbitmq_pika_conn_config + + channel = conn.channel() + channel.queue_declare( + queue=queue, + durable=True, + ) + + for filename in filenames: + channel.basic_publish( + exchange="", + routing_key=rabbitmq_fetch_filenames_no_wait_client._queue, + body=filename, + properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), + ) + + counter = 0 + + def mock_failed_fetch( + self, + *args, + **kwargs, + ) -> None: + nonlocal counter + + if counter == 0: + counter += 1 + monkeypatch.undo() + raise Exception("Failed to fetch!") + + monkeypatch.setattr(pika.channel.Channel, "basic_get", mock_failed_fetch) + + all_filenames = [] + + with caplog.at_level("ERROR"): + for fetched_filename in rabbitmq_fetch_filenames_no_wait_client.fetch(): + all_filenames.append(fetched_filename) + assert "Failed to fetch!" in caplog.text + + assert sorted(all_filenames) == sorted(filenames) diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_poll_until_timeout.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_poll_until_timeout.py new file mode 100644 index 0000000..6039dab --- /dev/null +++ b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_poll_until_timeout.py @@ -0,0 +1,46 @@ +import time +import pytest +from .utils import random_csv_filenames +from src.adapters.fetch_filenames.rabbitmq import RabbitMQFetchFilenamesClient +import pika +import pytest + + +@pytest.mark.smoke +@pytest.mark.parametrize("timeout", [0.5 * i for i in range(1, 5)]) +def test_fetch_none_wait_timeout( + rabbitmq_fetch_filenames_client: RabbitMQFetchFilenamesClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + timeout: int, +): + new_rabbitmq_fetch_filenames_client = RabbitMQFetchFilenamesClient( + host=rabbitmq_fetch_filenames_client._host, + port=rabbitmq_fetch_filenames_client._port, + credentials_service=rabbitmq_fetch_filenames_client._credentials_service, + queue=rabbitmq_fetch_filenames_client._queue, + polling_timeout=timeout, + ) + + conn, queue = raw_rabbitmq_pika_conn_config + + channel = conn.channel() + + channel.queue_declare(queue=queue, durable=True) + + filename = random_csv_filenames()[0] + + channel.basic_publish( + exchange="", + routing_key=queue, + body=filename, + properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), + ) + + start_time = time.perf_counter() + + for fetched_filename in new_rabbitmq_fetch_filenames_client.fetch(): + assert fetched_filename == filename + + end_time = time.perf_counter() + + assert end_time - start_time >= timeout diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_successful_fetch.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_successful_fetch.py new file mode 100644 index 0000000..8fdeb4c --- /dev/null +++ b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_successful_fetch.py @@ -0,0 +1,60 @@ +import pytest +from .utils import random_csv_filenames +from src.adapters.fetch_filenames.rabbitmq import RabbitMQFetchFilenamesClient +import pika +import pytest + + +@pytest.mark.smoke +@pytest.mark.parametrize("filename", random_csv_filenames()) +def test_fetch_single_success( + rabbitmq_fetch_filenames_no_wait_client: RabbitMQFetchFilenamesClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + filename: str, +): + conn, queue = raw_rabbitmq_pika_conn_config + + channel = conn.channel() + + channel.queue_declare(queue=queue, durable=True) + + channel.basic_publish( + exchange="", + routing_key=rabbitmq_fetch_filenames_no_wait_client._queue, + body=filename, + properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), + ) + + for fetched_filename in rabbitmq_fetch_filenames_no_wait_client.fetch(): + assert fetched_filename == filename + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "filenames", + [random_csv_filenames() for _ in range(5)], +) +def test_publish_batch_success( + rabbitmq_fetch_filenames_no_wait_client: RabbitMQFetchFilenamesClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + filenames: list[str], +): + conn, queue = raw_rabbitmq_pika_conn_config + + channel = conn.channel() + + channel.queue_declare(queue=queue, durable=True) + + for filename in filenames: + channel.basic_publish( + exchange="", + routing_key=queue, + body=filename, + properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), + ) + + all_filenames = [] + for filename in rabbitmq_fetch_filenames_no_wait_client.fetch(): + all_filenames.append(filename) + + assert sorted(all_filenames) == sorted(filenames) diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/utils.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/utils.py new file mode 100644 index 0000000..8cc804a --- /dev/null +++ b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/utils.py @@ -0,0 +1,8 @@ +import random +import string + + +def random_csv_filenames() -> list[str]: + return [ + "".join(random.choices(string.ascii_letters, k=10)) + ".csv" for _ in range(5) + ] diff --git a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/conftest.py b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/conftest.py index 6e1d551..4131138 100644 --- a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/conftest.py +++ b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/conftest.py @@ -1,5 +1,5 @@ from src.adapters.upsert_iot_records.postgres import PostgresUpsertIOTRecordsClient -from src.deployments.scripts.config import PostgresConfig +from src.deployments.script.config import PostgresConfig import psycopg2 import pytest diff --git a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_conn.py b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_conn.py index 2edb0ad..7089aa3 100644 --- a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_conn.py +++ b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_conn.py @@ -1,6 +1,6 @@ import pytest from src.adapters.upsert_iot_records.postgres import PostgresUpsertIOTRecordsClient -from src.deployments.scripts.config import PostgresConfig +from src.deployments.script.config import PostgresConfig from src.entities import IOTRecord import psycopg2 from .utils import random_iot_records, MockedPostgresConnection @@ -24,9 +24,8 @@ def mocked_failed_conn( monkeypatch.setattr(psycopg2, "connect", mocked_failed_conn) - with pytest.raises(Exception) as e: + with pytest.raises(Exception, match="^Failed to connect$"): assert not postgres_upsert_iot_records_client.upsert(iot_record) - assert e.value == "Failed to connect" with raw_postgres_psycopg2_conn_config.cursor() as cursor: cursor.execute( @@ -65,9 +64,8 @@ def mocked_failed_conn( monkeypatch.setattr(psycopg2, "connect", mocked_failed_conn) - with pytest.raises(Exception) as e: + with pytest.raises(Exception, match="^Failed to connect$"): assert not any(postgres_upsert_iot_records_client.upsert(iot_records)) - assert e.value == "Failed to connect" with raw_postgres_psycopg2_conn_config.cursor() as cursor: stmt = """ @@ -105,9 +103,8 @@ def test_upsert_single_wrong_credentials( batch_upsert_size=1, ) - with pytest.raises(Exception) as e: + with pytest.raises(Exception, match="^.*403.*ACCESS_REFISED.*$"): assert not postgres_upsert_iot_records_client.upsert(iot_record) - assert "ACCESS_REFUSED" in e.value and "403" in e.value with raw_postgres_psycopg2_conn_config.cursor() as cursor: cursor.execute( @@ -141,9 +138,8 @@ def test_upsert_single_wrong_host( batch_upsert_size=1, ) - with pytest.raises(Exception) as e: + with pytest.raises(Exception, match="^.*403.*ACCESS_REFUSED.*$"): assert not postgres_upsert_iot_records_client.upsert(iot_record) - assert "ACCESS_REFUSED" in e.value and "403" in e.value with raw_postgres_psycopg2_conn_config.cursor() as cursor: cursor.execute( diff --git a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_upsert.py b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_upsert.py index a5f5425..12b7071 100644 --- a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_upsert.py +++ b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_upsert.py @@ -20,9 +20,8 @@ def test_upsert_single_failed( psycopg2, "connect", lambda *args, **kwargs: MockedPostgresConnection() ) - with pytest.raises(Exception) as e: + with pytest.raises(Exception, match="^Failed to execute!$"): assert not postgres_upsert_iot_records_client.upsert(iot_record) - assert e.value == "Failed to execute!" with raw_postgres_psycopg2_conn_config.cursor() as cursor: cursor.execute( @@ -56,9 +55,8 @@ def test_upsert_batch_failed( psycopg2, "connect", lambda *args, **kwargs: MockedPostgresConnection() ) - with pytest.raises(Exception) as e: + with pytest.raises(Exception, match="^Failed to execute!$"): assert not any(postgres_upsert_iot_records_client.upsert(iot_records)) - assert e.value == "Failed to execute!" with raw_postgres_psycopg2_conn_config.cursor() as cursor: stmt = """ @@ -141,13 +139,12 @@ def mocked_partially_failed_upsert( MockedPostgresCursor, "executemany", mocked_partially_failed_upsert ) - with pytest.raises(Exception) as e: + with pytest.raises(Exception, match="^Failed to execute!"): upsert_successes = new_postgres_upsert_iot_records_client.upsert(iot_records) assert not all(upsert_successes) assert any(upsert_successes) assert upsert_successes[2] == False - assert e.value == "Failed to execute!" successful_records = [ iot_record diff --git a/producer/tests/test_adapters/test_publish_filenames/conftest.py b/producer/tests/test_adapters/test_publish_filenames/conftest.py deleted file mode 100644 index e69de29..0000000 diff --git a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_conn.py b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_conn.py index 7e82051..6b7dd0e 100644 --- a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_conn.py +++ b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_conn.py @@ -3,7 +3,7 @@ from src.adapters.publish_filenames.rabbitmq import RabbitMQPublishFilenamesClient from src.deployments.script.config import RabbitMQConfig import pika -from pytest import MonkeyPatch +from pytest import LogCaptureFixture, MonkeyPatch @pytest.mark.smoke @@ -13,6 +13,7 @@ def test_publish_single_failed_conn( raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], filename: str, monkeypatch: MonkeyPatch, + caplog: LogCaptureFixture, ): def mocked_failed_conn( self, @@ -23,9 +24,9 @@ def mocked_failed_conn( monkeypatch.setattr(pika.BlockingConnection, "__init__", mocked_failed_conn) - with pytest.raises(Exception) as e: + with caplog.at_level("ERROR"): assert not rabbitmq_publish_filenames_client.publish(filename) - assert e.value == "Failed to connect" + assert "Failed to connect" in caplog.text pika_conn, queue = raw_rabbitmq_pika_conn_config @@ -45,6 +46,7 @@ def test_publish_batch_failed_conn( raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], filenames: list[str], monkeypatch: MonkeyPatch, + caplog: LogCaptureFixture, ): def mocked_failed_conn( self, @@ -55,9 +57,9 @@ def mocked_failed_conn( monkeypatch.setattr(pika.BlockingConnection, "__init__", mocked_failed_conn) - with pytest.raises(Exception) as e: + with caplog.at_level("ERROR"): assert not any(rabbitmq_publish_filenames_client.publish(filenames)) - assert e.value == "Failed to connect" + assert "Failed to connect" in caplog.text pika_conn, queue = raw_rabbitmq_pika_conn_config @@ -73,6 +75,7 @@ def mocked_failed_conn( def test_publish_single_wrong_credentials( raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], filename: str, + caplog: LogCaptureFixture, ): rabbitmq_publish_filenames_client = RabbitMQPublishFilenamesClient( host=RabbitMQConfig.HOST, @@ -81,9 +84,9 @@ def test_publish_single_wrong_credentials( queue=RabbitMQConfig.QUEUE, ) - with pytest.raises(Exception) as e: + with caplog.at_level("ERROR"): assert not rabbitmq_publish_filenames_client.publish(filename) - assert "ACCESS_REFUSED" in e.value and "403" in e.value + assert "ACCESS_REFUSED" in caplog.text and "403" in caplog.text pika_conn, queue = raw_rabbitmq_pika_conn_config channel = pika_conn.channel() @@ -98,6 +101,7 @@ def test_publish_single_wrong_credentials( def test_publish_single_wrong_host( raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], filename: str, + caplog: LogCaptureFixture, ): rabbitmq_publish_filenames_client = RabbitMQPublishFilenamesClient( host="wrong", @@ -106,9 +110,9 @@ def test_publish_single_wrong_host( queue=RabbitMQConfig.QUEUE, ) - with pytest.raises(Exception) as e: + with caplog.at_level("ERROR"): assert not rabbitmq_publish_filenames_client.publish(filename) - assert "ACCESS_REFUSED" in e.value and "403" in e.value + assert "Name or service not known" in caplog.text pika_conn, queue = raw_rabbitmq_pika_conn_config channel = pika_conn.channel() diff --git a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_publish.py b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_publish.py index 0750723..ff4cd2b 100644 --- a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_publish.py +++ b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_publish.py @@ -3,7 +3,7 @@ from src.adapters.publish_filenames.rabbitmq import RabbitMQPublishFilenamesClient import pika import pytest -from pytest import MonkeyPatch +from pytest import LogCaptureFixture, MonkeyPatch @pytest.mark.smoke @@ -13,10 +13,11 @@ def test_publish_single_failed( rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], filename: str, + caplog: LogCaptureFixture, ): - with pytest.raises(Exception) as e: + with caplog.at_level("ERROR"): assert not rabbitmq_publish_filenames_client.publish(filename) - assert e.value == "Failed to publish" + assert "Failed to publish" in caplog.text pika_conn, queue = raw_rabbitmq_pika_conn_config @@ -36,10 +37,11 @@ def test_publish_batch_failed( rabbitmq_publish_filenames_client: RabbitMQPublishFilenamesClient, raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], filenames: list[str], + caplog: LogCaptureFixture, ): - with pytest.raises(Exception) as e: + with caplog.at_level("ERROR"): assert not any(rabbitmq_publish_filenames_client.publish(filenames)) - assert e.value == "Failed to publish" + assert "Failed to publish" in caplog.text pika_conn, queue = raw_rabbitmq_pika_conn_config @@ -59,6 +61,7 @@ def test_publish_batch_partial_failed( raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], filenames: list[str], monkeypatch: MonkeyPatch, + caplog: LogCaptureFixture, ): counter = 0 @@ -94,7 +97,7 @@ def mocked_partially_failed_basic_publish( mocked_partially_failed_basic_publish, ) - with pytest.raises(Exception) as e: + with caplog.at_level("ERROR"): publish_successes = rabbitmq_publish_filenames_client.publish(filenames) successes_filenames = [ @@ -105,7 +108,7 @@ def mocked_partially_failed_basic_publish( assert not all(publish_successes) assert any(publish_successes) assert publish_successes[2] == False - assert e.value == "Failed to publish" + assert "Failed to publish" in caplog.text pika_conn, queue = raw_rabbitmq_pika_conn_config diff --git a/producer/tests/test_deployments/test_main/test_main_function_failed.py b/producer/tests/test_deployments/test_main/test_main_function_failed.py index 27a18e9..aa708c5 100644 --- a/producer/tests/test_deployments/test_main/test_main_function_failed.py +++ b/producer/tests/test_deployments/test_main/test_main_function_failed.py @@ -36,8 +36,7 @@ def test_main_flow_has_failed_files( "src.adapters.publish_filenames.rabbitmq.RabbitMQPublishFilenamesClient.publish", lambda self, filename: False, ) - caplog.at_level("CRITICAL") - with pytest.raises(Exception) as e: - main() - assert "Failed to publish filenames" in str(e.value) - assert "Failed to publish filenames" in caplog.text + with caplog.at_level("ERROR"): + with pytest.raises(Exception, match="^Failed to publish filenames.*$"): + main() + assert "Failed to publish filenames" in caplog.text From b4d0c70c3d8bcc0b534377d6c114ad622caa7e00 Mon Sep 17 00:00:00 2001 From: alexau Date: Sun, 3 Dec 2023 02:20:46 +0800 Subject: [PATCH 29/36] Updated the tests --- .coverage_consumer | Bin 53248 -> 0 bytes Makefile | 2 +- .../test_postgres/test_failed_conn.py | 20 ++++++++++++------ .../test_postgres/test_failed_upsert.py | 16 +++++++++----- .../test_rabbitmq/test_failed_conn.py | 5 +---- 5 files changed, 26 insertions(+), 17 deletions(-) delete mode 100644 .coverage_consumer diff --git a/.coverage_consumer b/.coverage_consumer deleted file mode 100644 index ee56acd7969777dee86fa47e40f84fd9ee7436ae..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 53248 zcmeI4UyKvS8Nj{vde>{OJ$L8x`3$1Bs!ER&$6YU!R1hij+FTOUNP$2?3V~&9ukSXs zy=Hd}cOil9B@$}>2&peAQI+(qeQ1-HwlA%ehpMX7L`YGUs_6q2eSoxuL}{QXg{J*x z)@!eiI~w`GkmFm~{`SwzH{bW0Z)SGZ9`D@okm2Y`$+Rk(qihWIgg7qrbwvqsva zt}~xBs{pS=n>TQx<;u1aC@Pp%ky@X(v}$2Sw?`GN>0!YHJ7*owTg$ABJEl@Ls?5!* zZ8(NmRrFcCPW;~dRj7P*0zTyDdShaCXox$0 zkOd0Gqvj3AZth5JLDM|FMCj1#OUDguCG9~%TT!{Jm?hR^Xl});9Ej#%n;G|EXclc~ zirGjAmfq56qsD>_O1^C7+hdJIjIBchfxf^`R;pJEw5izChk|)Xx3p<}tZ~)r7H)MQ zLFi0KLUw9ssnBVLfI89Kjk`HP%&uO|%}Xqd8nx9BVp|CLHMvMP>R+npnq9a2j^uis zq@SbJOZsi3aRuDjAPU*Z)l0e4NT*ITc4Dr`i`h(ubG;Tp`z890wmCu1w3@(zf#9>7 zU(=i!k3=5hp^ugW;pwNUx4H9d4v+l6?rH3p$>Dbe<*Hh3s_Sr+K5tba9_jc2^4H#DG zuw`%Y&nm++noWBldsxYvW?9#&-q-*+w%JgoVJpmrYe%K6Egkk8)iHf&nAzkHF0`B9 z1&z*cU$9fDXw_n4yrRRfrxp5s+Bj)BUMB(>m0@U@h8o^I#aLG zCGq31$gZq9y6w=Vyf!4T#_5sGK2=z7SaV?Eop)5yox+Stm%>$HA!FmDrR8C%ZRMbs z#I6@$$-7;umsOS;7U8+lAXt*^wk0*QTvn-*;2tdHgC&#HmPi(CNp#zmk_OC*Eh6Co zutK^;RzN*hDGE6QMy%ngQDZN&c{^hb+fn_;ZhfW zsZLF8j$sRjFMs3$n-9cpN(liUwu{T5()1yq@c{ueb~M1#-XfsrsVFF#>jZCycM8z^ zmuOZ1{}kWlvIBmXBT)9fo9cI4pSX~CDgFZaCE1tyFl8q%$1aH%#2MkQ!ei05V^7~6H@#>D z5C=14?_AWpHy6x#;|#?Bv~%jtyRX{-4^xyK9%PGe97z z^6q5E8k+=`od3&@^X|qQp#Pfre_~lEnuZ5Z#5LZX>e$rg-1$25f9d9w5bz01`j~NB{{S0VIF~kN^@u0!RP}Ab}M~AQDMRZ2f=gT!?%^J|cf3XUHGOTjV$7 zSL7$;N91MlB6*G+AqR;;H1a5UkZdKJ$SM*|U4|^UApsj<=%<(ZKf3$$8|O}Z z@$|)TZU9bxb>Qg!pT76OaOsa%zIXD{yXTKzexTPRu?$@5%iLAr#D3W8=|3Auq+w?; zeR}uDh2L(8_rbwH-#=g7dhA}I7xq)VoX|sM4;LdK`^c%2IWYx$J*iWokffKAVMzwr zFP}O&ErIlT64m6Nv;9 z@i-Hb#6&E{geWp0utx%1G|G;6o{30=iEx;Q2LSB*|4`4JTwUlp5 None: @@ -24,8 +24,9 @@ def mocked_failed_conn( monkeypatch.setattr(psycopg2, "connect", mocked_failed_conn) - with pytest.raises(Exception, match="^Failed to connect$"): + with caplog.at_level("ERROR"): assert not postgres_upsert_iot_records_client.upsert(iot_record) + assert "Failed to connect" in caplog.text with raw_postgres_psycopg2_conn_config.cursor() as cursor: cursor.execute( @@ -54,9 +55,9 @@ def test_upsert_batch_failed_conn( raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, iot_records: list[IOTRecord], monkeypatch: MonkeyPatch, + caplog: LogCaptureFixture, ): def mocked_failed_conn( - self, *args, **kwargs, ) -> None: @@ -64,8 +65,9 @@ def mocked_failed_conn( monkeypatch.setattr(psycopg2, "connect", mocked_failed_conn) - with pytest.raises(Exception, match="^Failed to connect$"): + with caplog.at_level("ERROR"): assert not any(postgres_upsert_iot_records_client.upsert(iot_records)) + assert "Failed to connect" in caplog.text with raw_postgres_psycopg2_conn_config.cursor() as cursor: stmt = """ @@ -94,6 +96,7 @@ def mocked_failed_conn( def test_upsert_single_wrong_credentials( raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, iot_record: IOTRecord, + caplog: LogCaptureFixture, ): postgres_upsert_iot_records_client = PostgresUpsertIOTRecordsClient( host=PostgresConfig.HOST, @@ -103,8 +106,9 @@ def test_upsert_single_wrong_credentials( batch_upsert_size=1, ) - with pytest.raises(Exception, match="^.*403.*ACCESS_REFISED.*$"): + with caplog.at_level("ERROR"): assert not postgres_upsert_iot_records_client.upsert(iot_record) + assert "ERROR" in caplog.text with raw_postgres_psycopg2_conn_config.cursor() as cursor: cursor.execute( @@ -129,6 +133,7 @@ def test_upsert_single_wrong_credentials( def test_upsert_single_wrong_host( raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, iot_record: IOTRecord, + caplog: LogCaptureFixture, ): postgres_upsert_iot_records_client = PostgresUpsertIOTRecordsClient( host="wrong", @@ -138,8 +143,9 @@ def test_upsert_single_wrong_host( batch_upsert_size=1, ) - with pytest.raises(Exception, match="^.*403.*ACCESS_REFUSED.*$"): + with caplog.at_level("ERROR"): assert not postgres_upsert_iot_records_client.upsert(iot_record) + assert "ERROR" in caplog.text with raw_postgres_psycopg2_conn_config.cursor() as cursor: cursor.execute( diff --git a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_upsert.py b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_upsert.py index 12b7071..01555dc 100644 --- a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_upsert.py +++ b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/test_failed_upsert.py @@ -5,7 +5,7 @@ import pytest from src.entities import IOTRecord import psycopg2 -from pytest import MonkeyPatch +from pytest import MonkeyPatch, LogCaptureFixture @pytest.mark.smoke @@ -15,13 +15,15 @@ def test_upsert_single_failed( raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, iot_record: IOTRecord, monkeypatch: MonkeyPatch, + caplog: LogCaptureFixture, ): monkeypatch.setattr( psycopg2, "connect", lambda *args, **kwargs: MockedPostgresConnection() ) - with pytest.raises(Exception, match="^Failed to execute!$"): + with caplog.at_level("ERROR"): assert not postgres_upsert_iot_records_client.upsert(iot_record) + assert "Failed to execute!" in caplog.text with raw_postgres_psycopg2_conn_config.cursor() as cursor: cursor.execute( @@ -50,13 +52,15 @@ def test_upsert_batch_failed( raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, iot_records: list[IOTRecord], monkeypatch: MonkeyPatch, + caplog: LogCaptureFixture, ): monkeypatch.setattr( psycopg2, "connect", lambda *args, **kwargs: MockedPostgresConnection() ) - with pytest.raises(Exception, match="^Failed to execute!$"): - assert not any(postgres_upsert_iot_records_client.upsert(iot_records)) + with caplog.at_level("ERROR"): + assert not all(postgres_upsert_iot_records_client.upsert(iot_records)) + assert "Failed to execute!" in caplog.text with raw_postgres_psycopg2_conn_config.cursor() as cursor: stmt = """ @@ -90,6 +94,7 @@ def test_upsert_batch_partial_failed( raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, iot_records: list[IOTRecord], monkeypatch: MonkeyPatch, + caplog: LogCaptureFixture, ): new_postgres_upsert_iot_records_client = PostgresUpsertIOTRecordsClient( host=postgres_upsert_iot_records_client._host, @@ -139,12 +144,13 @@ def mocked_partially_failed_upsert( MockedPostgresCursor, "executemany", mocked_partially_failed_upsert ) - with pytest.raises(Exception, match="^Failed to execute!"): + with caplog.at_level("ERROR"): upsert_successes = new_postgres_upsert_iot_records_client.upsert(iot_records) assert not all(upsert_successes) assert any(upsert_successes) assert upsert_successes[2] == False + assert "Failed to execute!" in caplog.text successful_records = [ iot_record diff --git a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_conn.py b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_conn.py index 6b7dd0e..46f5819 100644 --- a/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_conn.py +++ b/producer/tests/test_adapters/test_publish_filenames/test_rabbitmq/test_failed_conn.py @@ -101,7 +101,6 @@ def test_publish_single_wrong_credentials( def test_publish_single_wrong_host( raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], filename: str, - caplog: LogCaptureFixture, ): rabbitmq_publish_filenames_client = RabbitMQPublishFilenamesClient( host="wrong", @@ -110,9 +109,7 @@ def test_publish_single_wrong_host( queue=RabbitMQConfig.QUEUE, ) - with caplog.at_level("ERROR"): - assert not rabbitmq_publish_filenames_client.publish(filename) - assert "Name or service not known" in caplog.text + assert not rabbitmq_publish_filenames_client.publish(filename) pika_conn, queue = raw_rabbitmq_pika_conn_config channel = pika_conn.channel() From bcb7b188ad849440a79b0e026ef372c9341a0463 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 2 Dec 2023 18:22:18 +0000 Subject: [PATCH 30/36] Updated coverage.svg --- coverage.svg | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/coverage.svg b/coverage.svg index a8c7e72..3438732 100644 --- a/coverage.svg +++ b/coverage.svg @@ -9,13 +9,13 @@ - + coverage coverage - 92% - 92% + 97% + 97% From 08d3cd557972d08f4e1e2015ee044809fdec1591 Mon Sep 17 00:00:00 2001 From: alexau Date: Sun, 3 Dec 2023 13:07:45 +0800 Subject: [PATCH 31/36] changing the names for better consistency --- .env | 3 +- .github/workflows/test.yml | 23 +- Makefile | 2 + .../src/adapters/fetch_filenames/rabbitmq.py | 1 - .../adapters/file_parse_iot_records/csv.py | 41 ++-- consumer/src/deployments/script/config.py | 3 +- consumer/src/deployments/script/main.py | 1 + .../test_rabbitmq/test_failed_fetch.py | 50 ++++ .../test_csv/conftest.py | 87 +++++++ .../test_csv/test_close.py | 26 ++ .../test_csv/test_failed_open_file.py | 110 +++++++++ .../test_failed_other_file_formats.py | 113 +++++++++ .../test_csv/test_failed_parse.py | 223 ++++++++++++++++++ .../test_csv/test_successful_parse.py | 61 +++++ .../test_csv/utils.py | 136 +++++++++++ .../test_postgres/conftest.py | 6 +- database/assets/create_records_table.sql | 6 +- docker-compose.yml | 1 + producer/src/deployments/script/config.py | 34 +-- producer/src/usecases/publish_filenames.py | 2 +- 20 files changed, 884 insertions(+), 45 deletions(-) create mode 100644 consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_close.py create mode 100644 consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_open_file.py create mode 100644 consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_other_file_formats.py create mode 100644 consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_parse.py create mode 100644 consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_successful_parse.py create mode 100644 consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/utils.py diff --git a/.env b/.env index 0b24f8e..750e8fd 100644 --- a/.env +++ b/.env @@ -12,7 +12,7 @@ RABBITMQ_PORT=5672 RABBITMQ_WEBAPP_PORT=15672 RABBITMQ_POLLING_TIMEOUT=60 -QUEUE_NAME=filenames +RABBITMQ_QUEUE_NAME=filenames AMAZON_LINUX_VERSION_TAG=2023.2.20231113.0 @@ -35,3 +35,4 @@ CONSUMER_LOG_ROTATION=midnight CSV_PARSER_RECOGNIZED_DATETIME_FORMATS="%Y-%m-%dT%H:%M:%S.%f%z" CSV_PARSER_DELIMITER="," +CSV_PARSER_FILE_EXTENSION=.csv diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 51fb722..1b81d3d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,6 +7,8 @@ jobs: load-dotenv: runs-on: ubuntu-latest outputs: + target-file-dir: ${{ steps.load-dotenv.outputs.TARGET_FILE_DIR }} + target-file-extension: ${{ steps.load-dotenv.outputs.TARGET_FILE_EXTENSION }} postgres-version-tag: ${{ steps.load-dotenv.outputs.POSTGRES_VERSION_TAG }} postgres-port: ${{ steps.load-dotenv.outputs.POSTGRES_PORT }} postgres-username: ${{ steps.load-dotenv.outputs.POSTGRES_USERNAME }} @@ -16,7 +18,10 @@ jobs: rabbitmq-port: ${{ steps.load-dotenv.outputs.RABBITMQ_PORT }} rabbitmq-username: ${{ steps.load-dotenv.outputs.RABBITMQ_USERNAME }} rabbitmq-password: ${{ steps.load-dotenv.outputs.RABBITMQ_PASSWORD }} - queue-name: ${{ steps.load-dotenv.outputs.QUEUE_NAME }} + rabbitmq-queue-name: ${{ steps.load-dotenv.outputs.RABBITMQ_QUEUE_NAME }} + csv-parser-recognized-datetime-formats: ${{ steps.load-dotenv.outputs.CSV_PARSER_RECOGNIZED_DATETIME_FORMATS }} + csv-parser-delimiter: ${{ steps.load-dotenv.outputs.CSV_PARSER_DELIMITER }} + csv-parser-file-extension: ${{ steps.load-dotenv.outputs.CSV_PARSER_FILE_EXTENSION }} steps: - name: Checkout uses: actions/checkout@v4 @@ -26,6 +31,8 @@ jobs: set -o allexport source .env set +o allexport + echo "TARGET_FILE_DIR=$TARGET_FILE_DIR" >> $GITHUB_OUTPUT + echo "TARGET_FILE_EXTENSION=$TARGET_FILE_EXTENSION" >> $GITHUB_OUTPUT echo "POSTGRES_VERSION_TAG=$POSTGRES_VERSION_TAG" >> $GITHUB_OUTPUT echo "POSTGRES_PORT=$POSTGRES_PORT" >> $GITHUB_OUTPUT echo "POSTGRES_USERNAME=$POSTGRES_USERNAME" >> $GITHUB_OUTPUT @@ -35,7 +42,10 @@ jobs: echo "RABBITMQ_PORT=$RABBITMQ_PORT" >> $GITHUB_OUTPUT echo "RABBITMQ_USERNAME=$RABBITMQ_USERNAME" >> $GITHUB_OUTPUT echo "RABBITMQ_PASSWORD=$RABBITMQ_PASSWORD" >> $GITHUB_OUTPUT - echo "QUEUE_NAME=$QUEUE_NAME" >> $GITHUB_OUTPUT + echo "RABBITMQ_QUEUE_NAME=$RABBITMQ_QUEUE_NAME" >> $GITHUB_OUTPUT + echo "CSV_PARSER_RECOGNIZED_DATETIME_FORMATS=$CSV_PARSER_RECOGNIZED_DATETIME_FORMATS" >> $GITHUB_OUTPUT + echo "CSV_PARSER_DELIMITER=$CSV_PARSER_DELIMITER" >> $GITHUB_OUTPUT + echo "CSV_PARSER_FILE_EXTENSION=$CSV_PARSER_FILE_EXTENSION" >> $GITHUB_OUTPUT test-producer: needs: load-dotenv runs-on: ubuntu-latest @@ -90,7 +100,9 @@ jobs: RABBITMQ_PORT: ${{ needs.load-dotenv.outputs.rabbitmq-port }} RABBITMQ_USERNAME: ${{ needs.load-dotenv.outputs.rabbitmq-username }} RABBITMQ_PASSWORD: ${{ needs.load-dotenv.outputs.rabbitmq-password }} - QUEUE_NAME: ${{ needs.load-dotenv.outputs.queue-name }} + RABBITMQ_QUEUE_NAME: ${{ needs.load-dotenv.outputs.rabbitmq-queue-name }} + TARGET_FILE_DIR: ${{ needs.load-dotenv.outputs.target-file-dir }} + TARGET_FILE_EXTENSION: ${{ needs.load-dotenv.outputs.target-file-extension }} - name: Output coverage file id: output-coverage-file run: | @@ -168,7 +180,10 @@ jobs: RABBITMQ_PORT: ${{ needs.load-dotenv.outputs.rabbitmq-port }} RABBITMQ_USERNAME: ${{ needs.load-dotenv.outputs.rabbitmq-username }} RABBITMQ_PASSWORD: ${{ needs.load-dotenv.outputs.rabbitmq-password }} - QUEUE_NAME: ${{ needs.load-dotenv.outputs.queue-name }} + RABBITMQ_QUEUE_NAME: ${{ needs.load-dotenv.outputs.rabbitmq-queue-name }} + CSV_PARSER_RECOGNIZED_DATETIME_FORMATS: ${{ needs.load-dotenv.outputs.csv-parser-recognized-datetime-formats }} + CSV_PARSER_DELIMITER: ${{ needs.load-dotenv.outputs.csv-parser-delimiter }} + CSV_PARSER_FILE_EXTENSION: ${{ needs.load-dotenv.outputs.csv-parser-file-extension }} - name: Output coverage file id: output-coverage-file run: | diff --git a/Makefile b/Makefile index 3411d53..86d53ca 100644 --- a/Makefile +++ b/Makefile @@ -43,6 +43,8 @@ test_consumer: export RABBITMQ_USERNAME=$(RABBITMQ_USERNAME) && \ export RABBITMQ_PASSWORD=$(RABBITMQ_PASSWORD) && \ export QUEUE_NAME=$(QUEUE_NAME) && \ + export CSV_PARSER_RECOGNIZED_DATETIME_FORMATS=$(CSV_PARSER_RECOGNIZED_DATETIME_FORMATS) && \ + export CSV_PARSER_DELIMITER=$(CSV_PARSER_DELIMITER) && \ COVERAGE_FILE=.coverage_consumer coverage run -m pytest -vxs consumer/tests coverage_report: coverage combine .coverage_producer .coverage_consumer && \ diff --git a/consumer/src/adapters/fetch_filenames/rabbitmq.py b/consumer/src/adapters/fetch_filenames/rabbitmq.py index c166d67..fe858bc 100644 --- a/consumer/src/adapters/fetch_filenames/rabbitmq.py +++ b/consumer/src/adapters/fetch_filenames/rabbitmq.py @@ -62,7 +62,6 @@ def fetch(self) -> Generator[str, None, None]: method, properties, body = channel.basic_get( queue=self._queue, auto_ack=False ) - if method is None and properties is None and body is None: if self._last_poll_time is None: self._last_poll_time = datetime.now() diff --git a/consumer/src/adapters/file_parse_iot_records/csv.py b/consumer/src/adapters/file_parse_iot_records/csv.py index 072fb20..e38d0ca 100644 --- a/consumer/src/adapters/file_parse_iot_records/csv.py +++ b/consumer/src/adapters/file_parse_iot_records/csv.py @@ -1,6 +1,7 @@ from concurrent.futures import ThreadPoolExecutor from datetime import datetime from decimal import Decimal +from decimal import InvalidOperation from typing import Iterator, Optional, overload, Sequence from typing_extensions import override from ...entities import IOTRecord @@ -14,9 +15,11 @@ def __init__( self, recognized_datetime_formats: Sequence[str], delimiter: str = ",", + file_extension: str = ".csv", ) -> None: self._delimiter = delimiter self._recognized_datetime_formats = recognized_datetime_formats + self._file_extension = file_extension @overload def parse(self, filename: str) -> list[IOTRecord]: @@ -37,10 +40,13 @@ def parse( @override def parse_stream(self, filename: str) -> Iterator[IOTRecord]: try: + if not filename.endswith(self._file_extension): + raise ValueError(f"File extension must be {self._file_extension}") with open(filename) as csvfile: - reader = csv.reader(csvfile, delimiter=self._delimiter) + reader = csv.reader(csvfile, delimiter=self._delimiter, strict=True) yield from self._parse_iter(reader) except Exception as e: + logging.error(f"Failed to parse {filename}") logging.exception(e) def _parse_datetime(self, datetime_str: str) -> Optional[datetime]: @@ -54,36 +60,39 @@ def _parse_datetime(self, datetime_str: str) -> Optional[datetime]: def _parse_value(self, value_str: str) -> Optional[Decimal]: try: return Decimal(value_str) - except ValueError: + except InvalidOperation: return None def _parse_iter(self, reader: Iterator[list[str]]) -> Iterator[IOTRecord]: iot_records: list[IOTRecord] = [] for row in reader: - try: - parsed_datetime = self._parse_datetime(row[0]) - if parsed_datetime is None: - raise ValueError(f"Unrecognized datetime format: {row[0]}") + parsed_datetime = self._parse_datetime(row[0]) + if parsed_datetime is None: + logging.warning(f"Unrecognized datetime format: {row[0]}") + + parsed_value = self._parse_value(row[2]) + if parsed_value is None: + logging.warning(f"Unrecognized value format: {row[2]}") - parsed_value = self._parse_value(row[2]) - if parsed_value is None: - raise ValueError(f"Unrecognized value format: {row[2]}") + if parsed_datetime is None or parsed_value is None: + continue - yield IOTRecord( - datetime=parsed_datetime, - sensor_id=str(row[1]), - value=parsed_value, - ) - except Exception as e: - logging.exception(e) + yield IOTRecord( + record_time=parsed_datetime, + sensor_id=str(row[1]), + value=parsed_value, + ) return iot_records def _parse_single(self, filename: str) -> list[IOTRecord]: try: + if not filename.endswith(self._file_extension): + raise ValueError(f"File extension must be {self._file_extension}") with open(filename) as csvfile: reader = csv.reader(csvfile, delimiter=self._delimiter) return list(self._parse_iter(reader)) except Exception as e: + logging.error(f"Failed to parse {filename}") logging.exception(e) return [] diff --git a/consumer/src/deployments/script/config.py b/consumer/src/deployments/script/config.py index a7ed314..0ed6ae1 100644 --- a/consumer/src/deployments/script/config.py +++ b/consumer/src/deployments/script/config.py @@ -17,7 +17,7 @@ class RabbitMQConfig: PORT = int(os.getenv("RABBITMQ_PORT", 5672)) USERNAME = os.getenv("RABBITMQ_USERNAME", "guest") PASSWORD = os.getenv("RABBITMQ_PASSWORD", "guest") - QUEUE = os.getenv("RABBITMQ_QUEUE", "filenames") + QUEUE = os.getenv("RABBITMQ_QUEUE_NAME", "filenames") POLLING_TIMEOUT = int(os.getenv("RABBITMQ_POLLING_TIMEOUT", 10)) @@ -35,3 +35,4 @@ class CSVParserConfig: "CSV_PARSER_RECOGNIZED_DATETIME_FORMATS", "" ).split(",") DELIMITER = os.getenv("CSV_PARSER_DELIMITER", ",") + FILE_EXTENSION = os.getenv("CSV_PARSER_FILE_EXTENSION", ".csv") diff --git a/consumer/src/deployments/script/main.py b/consumer/src/deployments/script/main.py index 9a3306c..09d0022 100644 --- a/consumer/src/deployments/script/main.py +++ b/consumer/src/deployments/script/main.py @@ -20,6 +20,7 @@ file_parse_iot_records_client = CSVParseIOTRecordsClient( recognized_datetime_formats=CSVParserConfig.RECOGNIZED_DATETIME_FORMATS, delimiter=CSVParserConfig.DELIMITER, + file_extension=CSVParserConfig.FILE_EXTENSION, ) upsert_iot_records_client = PostgresUpsertIOTRecordsClient( diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_failed_fetch.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_failed_fetch.py index 4e8aec1..9cddf90 100644 --- a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_failed_fetch.py +++ b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_failed_fetch.py @@ -96,3 +96,53 @@ def mock_failed_fetch( assert "Failed to fetch!" in caplog.text assert sorted(all_filenames) == sorted(filenames) + + +@pytest.mark.parametrize("filename", random_csv_filenames()) +def test_fetch_single_ack_exception_resilience( + rabbitmq_fetch_filenames_client: RabbitMQFetchFilenamesClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + filename: str, + monkeypatch: MonkeyPatch, +): + new_rabbitmq_fetch_filenames_client = RabbitMQFetchFilenamesClient( + host=rabbitmq_fetch_filenames_client._host, + port=rabbitmq_fetch_filenames_client._port, + credentials_service=rabbitmq_fetch_filenames_client._credentials_service, + queue=rabbitmq_fetch_filenames_client._queue, + polling_timeout=1, + ) + + conn, queue = raw_rabbitmq_pika_conn_config + + channel = conn.channel() + channel.queue_declare( + queue=queue, + durable=True, + ) + + channel.basic_publish( + exchange="", + routing_key=rabbitmq_fetch_filenames_client._queue, + body=filename, + properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), + ) + + counter = 0 + + def mock_failed_ack( + self, + *args, + **kwargs, + ) -> None: + nonlocal counter + + if counter == 0: + counter += 1 + monkeypatch.undo() + raise Exception("Failed to ack!") + + monkeypatch.setattr(pika.channel.Channel, "basic_ack", mock_failed_ack) + + for fetched_filename in new_rabbitmq_fetch_filenames_client.fetch(): + assert fetched_filename == filename diff --git a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/conftest.py b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/conftest.py index e69de29..663ddf9 100644 --- a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/conftest.py +++ b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/conftest.py @@ -0,0 +1,87 @@ +from .utils import ( + random_csv_file, + random_tsv_file, + random_ndjson_file, + random_invalid_datetime_rows, + random_invalid_datetime_and_value_rows, + random_invalid_value_rows, + random_valid_format_rows, +) +import pytest +from pytest import TempdirFactory +from pathlib import Path +from src.adapters.file_parse_iot_records.csv import CSVParseIOTRecordsClient +from src.deployments.script.config import CSVParserConfig + + +@pytest.fixture(scope="session") +def setup_tempdir(tmpdir_factory: TempdirFactory) -> Path: + return Path(tmpdir_factory.mktemp("artifact")) + + +@pytest.fixture(scope="function") +def random_valid_csv_file(setup_tempdir: Path) -> Path: + return random_csv_file(setup_tempdir, random_valid_format_rows()) + + +@pytest.fixture(scope="function") +def random_invalid_datetime_and_value_csv_file(setup_tempdir: Path) -> Path: + return random_csv_file(setup_tempdir, random_invalid_datetime_and_value_rows()) + + +@pytest.fixture(scope="function") +def random_invalid_datetime_csv_file(setup_tempdir: Path) -> Path: + return random_csv_file(setup_tempdir, random_invalid_datetime_rows()) + + +@pytest.fixture(scope="function") +def random_invalid_value_csv_file(setup_tempdir: Path) -> Path: + return random_csv_file(setup_tempdir, random_invalid_value_rows()) + + +@pytest.fixture(scope="function") +def random_valid_tsv_file(setup_tempdir: Path) -> Path: + return random_tsv_file(setup_tempdir, random_valid_format_rows()) + + +@pytest.fixture(scope="function") +def random_invalid_datetime_and_value_tsv_file(setup_tempdir: Path) -> Path: + return random_tsv_file(setup_tempdir, random_invalid_datetime_and_value_rows()) + + +@pytest.fixture(scope="function") +def random_invalid_datetime_tsv_file(setup_tempdir: Path) -> Path: + return random_tsv_file(setup_tempdir, random_invalid_datetime_rows()) + + +@pytest.fixture(scope="function") +def random_invalid_value_tsv_file(setup_tempdir: Path) -> Path: + return random_tsv_file(setup_tempdir, random_invalid_value_rows()) + + +@pytest.fixture(scope="function") +def random_valid_ndjson_file(setup_tempdir: Path) -> Path: + return random_ndjson_file(setup_tempdir, random_valid_format_rows()) + + +@pytest.fixture(scope="function") +def random_invalid_datetime_and_value_ndjson_file(setup_tempdir: Path) -> Path: + return random_ndjson_file(setup_tempdir, random_invalid_datetime_and_value_rows()) + + +@pytest.fixture(scope="function") +def random_invalid_datetime_ndjson_file(setup_tempdir: Path) -> Path: + return random_ndjson_file(setup_tempdir, random_invalid_datetime_rows()) + + +@pytest.fixture(scope="function") +def random_invalid_value_ndjson_file(setup_tempdir: Path) -> Path: + return random_ndjson_file(setup_tempdir, random_invalid_value_rows()) + + +@pytest.fixture(scope="function") +def csv_parse_iot_records_client() -> CSVParseIOTRecordsClient: + return CSVParseIOTRecordsClient( + recognized_datetime_formats=CSVParserConfig.RECOGNIZED_DATETIME_FORMATS, + delimiter=CSVParserConfig.DELIMITER, + ) diff --git a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_close.py b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_close.py new file mode 100644 index 0000000..1dfa47f --- /dev/null +++ b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_close.py @@ -0,0 +1,26 @@ +import pytest +from src.adapters.file_parse_iot_records.csv import CSVParseIOTRecordsClient +from pytest import FixtureRequest + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "fixture_name", + [ + "random_valid_csv_file", + "random_invalid_datetime_and_value_csv_file", + "random_invalid_datetime_csv_file", + "random_invalid_value_csv_file", + ] + * 5, +) +def test_close_always_successful( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_name: str, + request: FixtureRequest, +): + random_valid_csv_file: str = request.getfixturevalue(fixture_name) + + csv_parse_iot_records_client.parse(random_valid_csv_file) + + assert csv_parse_iot_records_client.close() diff --git a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_open_file.py b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_open_file.py new file mode 100644 index 0000000..ee1a637 --- /dev/null +++ b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_open_file.py @@ -0,0 +1,110 @@ +import pytest +from src.adapters.file_parse_iot_records.csv import CSVParseIOTRecordsClient +from pytest import FixtureRequest, MonkeyPatch, LogCaptureFixture +from src.entities import IOTRecord + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "fixture_name", + [ + "random_valid_csv_file", + "random_invalid_datetime_and_value_csv_file", + "random_invalid_datetime_csv_file", + "random_invalid_value_csv_file", + ] + * 5, +) +def test_parse_single_failed_open_file( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_name: str, + request: FixtureRequest, + caplog: LogCaptureFixture, + monkeypatch: MonkeyPatch, +): + random_csv_file: str = request.getfixturevalue(fixture_name) + + def mock_open(*args, **kwargs): + raise FileNotFoundError("Failed to open file!") + + monkeypatch.setattr("builtins.open", mock_open) + + with caplog.at_level("ERROR"): + iot_records = csv_parse_iot_records_client.parse(random_csv_file) + assert len(iot_records) == 0 + assert f"Failed to parse {random_csv_file}" in caplog.text + assert "Failed to open file!" in caplog.text + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "fixture_name", + [ + "random_valid_csv_file", + "random_invalid_datetime_and_value_csv_file", + "random_invalid_datetime_csv_file", + "random_invalid_value_csv_file", + ] + * 5, +) +def test_parse_stream_failed_open_file( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_name: str, + request: FixtureRequest, + caplog: LogCaptureFixture, + monkeypatch: MonkeyPatch, +): + random_csv_file: str = request.getfixturevalue(fixture_name) + + def mock_open(*args, **kwargs): + raise FileNotFoundError("Failed to open file!") + + monkeypatch.setattr("builtins.open", mock_open) + + all_iot_records: list[IOTRecord] = [] + with caplog.at_level("ERROR"): + for iot_record in csv_parse_iot_records_client.parse_stream(random_csv_file): + assert isinstance(iot_record, IOTRecord) + all_iot_records.append(iot_record) + assert len(all_iot_records) == 0 + assert f"Failed to parse {random_csv_file}" in caplog.text + assert "Failed to open file!" in caplog.text + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "fixture_names", + [ + tuple( + [ + "random_valid_csv_file", + "random_invalid_datetime_and_value_csv_file", + "random_invalid_datetime_csv_file", + "random_invalid_value_csv_file", + ] + ) + for _ in range(5) + ], +) +def test_parse_batch_failed_open_file( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_names: tuple[str, ...], + request: FixtureRequest, + caplog: LogCaptureFixture, + monkeypatch: MonkeyPatch, +): + random_csv_files: list[str] = [ + request.getfixturevalue(fixture_name) for fixture_name in fixture_names + ] + + def mock_open(*args, **kwargs): + raise FileNotFoundError("Failed to open file!") + + monkeypatch.setattr("builtins.open", mock_open) + + with caplog.at_level("ERROR"): + iot_records = csv_parse_iot_records_client.parse(random_csv_files) + for random_csv_file, iot_record in zip(random_csv_files, iot_records): + assert len(iot_record) == 0 + assert f"Failed to parse {random_csv_file}" in caplog.text + assert "Failed to open file!" in caplog.text diff --git a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_other_file_formats.py b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_other_file_formats.py new file mode 100644 index 0000000..9f1188d --- /dev/null +++ b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_other_file_formats.py @@ -0,0 +1,113 @@ +import pytest +from src.adapters.file_parse_iot_records.csv import CSVParseIOTRecordsClient +from pytest import FixtureRequest, LogCaptureFixture +from src.entities import IOTRecord + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "fixture_name", + [ + "random_valid_tsv_file", + "random_invalid_datetime_and_value_tsv_file", + "random_invalid_datetime_tsv_file", + "random_invalid_value_tsv_file", + "random_valid_ndjson_file", + "random_invalid_datetime_and_value_ndjson_file", + "random_invalid_datetime_ndjson_file", + "random_invalid_value_ndjson_file", + ] + * 5, +) +def test_parse_single_other_format_failed( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_name: str, + request: FixtureRequest, + caplog: LogCaptureFixture, +): + random_file: str = request.getfixturevalue(fixture_name) + + with caplog.at_level("ERROR"): + iot_records = csv_parse_iot_records_client.parse(random_file) + assert len(iot_records) == 0 + assert f"Failed to parse {random_file}" in caplog.text + assert ( + f"File extension must be {csv_parse_iot_records_client._file_extension}" + in caplog.text + ) + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "fixture_name", + [ + "random_valid_tsv_file", + "random_invalid_datetime_and_value_tsv_file", + "random_invalid_datetime_tsv_file", + "random_invalid_value_tsv_file", + "random_valid_ndjson_file", + "random_invalid_datetime_and_value_ndjson_file", + "random_invalid_datetime_ndjson_file", + "random_invalid_value_ndjson_file", + ] + * 5, +) +def test_parse_stream_other_format_failed( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_name: str, + request: FixtureRequest, + caplog: LogCaptureFixture, +): + random_file: str = request.getfixturevalue(fixture_name) + + all_iot_records: list[IOTRecord] = [] + with caplog.at_level("ERROR"): + for iot_record in csv_parse_iot_records_client.parse_stream(random_file): + assert isinstance(iot_record, IOTRecord) + all_iot_records.append(iot_record) + assert len(all_iot_records) == 0 + assert f"Failed to parse {random_file}" in caplog.text + assert ( + f"File extension must be {csv_parse_iot_records_client._file_extension}" + in caplog.text + ) + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "fixture_names", + [ + tuple( + [ + "random_valid_tsv_file", + "random_invalid_datetime_and_value_tsv_file", + "random_invalid_datetime_tsv_file", + "random_invalid_value_tsv_file", + "random_valid_ndjson_file", + "random_invalid_datetime_and_value_ndjson_file", + "random_invalid_datetime_ndjson_file", + "random_invalid_value_ndjson_file", + ] + ) + for _ in range(5) + ], +) +def test_parse_batch_other_format_failed( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_names: tuple[str, ...], + request: FixtureRequest, + caplog: LogCaptureFixture, +): + random_files: list[str] = [ + request.getfixturevalue(fixture_name) for fixture_name in fixture_names + ] + + with caplog.at_level("ERROR"): + iot_records = csv_parse_iot_records_client.parse(random_files) + for random_file, iot_record in zip(random_files, iot_records): + assert len(iot_record) == 0 + assert f"Failed to parse {random_file}" in caplog.text + assert ( + f"File extension must be {csv_parse_iot_records_client._file_extension}" + in caplog.text + ) diff --git a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_parse.py b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_parse.py new file mode 100644 index 0000000..a926c33 --- /dev/null +++ b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_parse.py @@ -0,0 +1,223 @@ +import pytest +from src.adapters.file_parse_iot_records.csv import CSVParseIOTRecordsClient +from src.entities import IOTRecord +from pytest import FixtureRequest, LogCaptureFixture + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "fixture_name", + ["random_invalid_value_csv_file"] * 5, +) +def test_parse_single_decimal_failed_ignore_row( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_name: str, + request: FixtureRequest, + caplog: LogCaptureFixture, +): + random_invalid_value_csv_file: str = request.getfixturevalue(fixture_name) + + with caplog.at_level("WARNING"): + iot_records = csv_parse_iot_records_client.parse(random_invalid_value_csv_file) + assert len(iot_records) == 0 + assert "Unrecognized value format:" in caplog.text + assert "Unrecognized datetime format:" not in caplog.text + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "fixture_name", + ["random_invalid_datetime_csv_file"] * 5, +) +def test_parse_single_datetime_failed_ignore_row( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_name: str, + request: FixtureRequest, + caplog: LogCaptureFixture, +): + random_invalid_datetime_csv_file: str = request.getfixturevalue(fixture_name) + + with caplog.at_level("WARNING"): + iot_records = csv_parse_iot_records_client.parse( + random_invalid_datetime_csv_file + ) + assert len(iot_records) == 0 + assert "Unrecognized datetime format:" in caplog.text + assert "Unrecognized value format:" not in caplog.text + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "fixture_name", + ["random_invalid_datetime_and_value_csv_file"] * 5, +) +def test_parse_single_datetime_and_value_failed_ignore_row( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_name: str, + request: FixtureRequest, + caplog: LogCaptureFixture, +): + random_invalid_datetime_and_value_csv_file: str = request.getfixturevalue( + fixture_name + ) + + with caplog.at_level("WARNING"): + iot_records = csv_parse_iot_records_client.parse( + random_invalid_datetime_and_value_csv_file + ) + assert len(iot_records) == 0 + assert "Unrecognized datetime format:" in caplog.text + assert "Unrecognized value format:" in caplog.text + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "fixture_name", + ["random_invalid_value_csv_file"] * 5, +) +def test_parse_stream_decimal_failed_ignore_row( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_name: str, + request: FixtureRequest, + caplog: LogCaptureFixture, +): + random_invalid_value_csv_file: str = request.getfixturevalue(fixture_name) + + all_iot_records: list[IOTRecord] = [] + with caplog.at_level("WARNING"): + for iot_records in csv_parse_iot_records_client.parse_stream( + random_invalid_value_csv_file + ): + all_iot_records.append(iot_records) + assert len(all_iot_records) == 0 + assert "Unrecognized value format:" in caplog.text + assert "Unrecognized datetime format:" not in caplog.text + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "fixture_name", + ["random_invalid_datetime_csv_file"] * 5, +) +def test_parse_stream_datetime_failed_ignore_row( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_name: str, + request: FixtureRequest, + caplog: LogCaptureFixture, +): + random_invalid_datetime_csv_file: str = request.getfixturevalue(fixture_name) + + all_iot_records: list[IOTRecord] = [] + with caplog.at_level("WARNING"): + for iot_records in csv_parse_iot_records_client.parse_stream( + random_invalid_datetime_csv_file + ): + all_iot_records.append(iot_records) + assert len(all_iot_records) == 0 + assert "Unrecognized datetime format:" in caplog.text + assert "Unrecognized value format:" not in caplog.text + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "fixture_name", + ["random_invalid_datetime_and_value_csv_file"] * 5, +) +def test_parse_stream_datetime_and_value_failed_ignore_row( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_name: str, + request: FixtureRequest, + caplog: LogCaptureFixture, +): + random_invalid_datetime_and_value_csv_file: str = request.getfixturevalue( + fixture_name + ) + + all_iot_records: list[IOTRecord] = [] + with caplog.at_level("WARNING"): + for iot_records in csv_parse_iot_records_client.parse_stream( + random_invalid_datetime_and_value_csv_file + ): + all_iot_records.append(iot_records) + assert len(all_iot_records) == 0 + assert "Unrecognized datetime format:" in caplog.text + assert "Unrecognized value format:" in caplog.text + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "fixture_names", + [tuple(["random_invalid_value_csv_file"] * 5) for _ in range(5)], +) +def test_parse_batch_decimal_failed_ignore_row( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_names: tuple[str, ...], + request: FixtureRequest, + caplog: LogCaptureFixture, +): + random_invalid_value_csv_files: list[str] = [ + request.getfixturevalue(fixture_name) for fixture_name in fixture_names + ] + + with caplog.at_level("WARNING"): + iot_records = csv_parse_iot_records_client.parse(random_invalid_value_csv_files) + + for iot_record in iot_records: + assert len(iot_record) == 0 + + assert "Unrecognized value format:" in caplog.text + assert "Unrecognized datetime format:" not in caplog.text + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "fixture_names", + [tuple(["random_invalid_datetime_csv_file"] * 5) for _ in range(5)], +) +def test_parse_batch_datetime_failed_ignore_row( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_names: tuple[str, ...], + request: FixtureRequest, + caplog: LogCaptureFixture, +): + random_invalid_datetime_csv_files: list[str] = [ + request.getfixturevalue(fixture_name) for fixture_name in fixture_names + ] + + with caplog.at_level("WARNING"): + iot_records = csv_parse_iot_records_client.parse( + random_invalid_datetime_csv_files + ) + + for iot_record in iot_records: + assert len(iot_record) == 0 + + assert "Unrecognized datetime format:" in caplog.text + assert "Unrecognized value format:" not in caplog.text + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "fixture_names", + [tuple(["random_invalid_datetime_and_value_csv_file"] * 5) for _ in range(5)], +) +def test_parse_batch_datetime_and_value_failed_ignore_row( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_names: tuple[str, ...], + request: FixtureRequest, + caplog: LogCaptureFixture, +): + random_invalid_datetime_and_value_csv_files: list[str] = [ + request.getfixturevalue(fixture_name) for fixture_name in fixture_names + ] + + with caplog.at_level("WARNING"): + iot_records = csv_parse_iot_records_client.parse( + random_invalid_datetime_and_value_csv_files + ) + + for iot_record in iot_records: + assert len(iot_record) == 0 + + assert "Unrecognized datetime format:" in caplog.text + assert "Unrecognized value format:" in caplog.text diff --git a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_successful_parse.py b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_successful_parse.py new file mode 100644 index 0000000..9345a7b --- /dev/null +++ b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_successful_parse.py @@ -0,0 +1,61 @@ +import pytest +from src.adapters.file_parse_iot_records.csv import CSVParseIOTRecordsClient +from src.entities import IOTRecord +from pytest import FixtureRequest + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "fixture_name", + ["random_valid_csv_file"] * 5, +) +def test_parse_single_successful( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_name: str, + request: FixtureRequest, +): + random_valid_csv_file: str = request.getfixturevalue(fixture_name) + iot_records = csv_parse_iot_records_client.parse(random_valid_csv_file) + assert len(iot_records) > 0 + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "fixture_names", + [tuple(["random_valid_csv_file" for _ in range(10)]) for _ in range(5)], +) +def test_parse_batch_successful( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_names: tuple[str, ...], + request: FixtureRequest, +): + random_valid_csv_files: list[str] = [ + request.getfixturevalue(fixture_name) for fixture_name in fixture_names + ] + iot_records = csv_parse_iot_records_client.parse(random_valid_csv_files) + + for iot_record in iot_records: + assert len(iot_record) > 0 + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "fixture_name", + ["random_valid_csv_file"] * 5, +) +def test_parse_stream_successful( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_name: str, + request: FixtureRequest, +): + random_valid_csv_file: str = request.getfixturevalue(fixture_name) + all_iot_records: list[IOTRecord] = [] + for iot_record in csv_parse_iot_records_client.parse_stream(random_valid_csv_file): + assert isinstance(iot_record, IOTRecord) + all_iot_records.append(iot_record) + + iot_records = csv_parse_iot_records_client.parse(random_valid_csv_file) + + assert sorted( + all_iot_records, key=lambda iot_record: iot_record.record_time + ) == sorted(iot_records, key=lambda iot_record: iot_record.record_time) diff --git a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/utils.py b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/utils.py new file mode 100644 index 0000000..fa5b48d --- /dev/null +++ b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/utils.py @@ -0,0 +1,136 @@ +from decimal import Decimal +import random +import string +import csv +from pathlib import Path +from datetime import datetime, timedelta +from zoneinfo import ZoneInfo +import zoneinfo +import random +import json + + +def random_valid_format_rows() -> list[tuple[str, ...]]: + rows = [] + for _ in range(10): + random_timezone = random.choice(list(zoneinfo.available_timezones())) + random_time_delta = timedelta( + hours=random.randint(0, 24), + minutes=random.randint(0, 60), + seconds=random.randint(0, 60), + ) + random_datetime = datetime.now(tz=ZoneInfo(random_timezone)) - random_time_delta + random_sensor_id = "".join(random.choices(string.ascii_letters, k=10)) + random_value = Decimal(random.random() * 100) + rows.append((random_datetime.isoformat(), random_sensor_id, str(random_value))) + return rows + + +def random_invalid_datetime_rows() -> list[tuple[str, ...]]: + rows = [] + all_datetime_formats = [ + "%Y-%m-%dT%H:%M:%S%z", + "%Y-%m-%dT%H:%M%z", + "%Y-%m-%d %H:%M:%S%z", + "%Y-%m-%d %H:%M%z", + ] + for _ in range(10): + random_timezone = random.choice(list(zoneinfo.available_timezones())) + random_time_delta = timedelta( + hours=random.randint(0, 24), + minutes=random.randint(0, 60), + seconds=random.randint(0, 60), + ) + random_datetime = datetime.now(tz=ZoneInfo(random_timezone)) - random_time_delta + random_sensor_id = "".join(random.choices(string.ascii_letters, k=10)) + random_value = Decimal(random.random() * 100) + random_datetime_format = random.choice(all_datetime_formats) + rows.append( + ( + random_datetime.strftime(random_datetime_format), + random_sensor_id, + str(random_value), + ) + ) + return rows + + +def random_invalid_value_rows() -> list[tuple[str, ...]]: + rows = [] + for _ in range(10): + random_timezone = random.choice(list(zoneinfo.available_timezones())) + random_time_delta = timedelta( + hours=random.randint(0, 24), + minutes=random.randint(0, 60), + seconds=random.randint(0, 60), + ) + random_datetime = datetime.now(tz=ZoneInfo(random_timezone)) - random_time_delta + random_sensor_id = "".join(random.choices(string.ascii_letters, k=10)) + random_value = "".join(random.choices(string.ascii_letters, k=10)) + rows.append( + ( + random_datetime.isoformat(), + random_sensor_id, + random_value, + ) + ) + return rows + + +def random_invalid_datetime_and_value_rows() -> list[tuple[str, ...]]: + rows = [] + all_datetime_formats = [ + "%Y-%m-%dT%H:%M:%S.%f%z", + "%Y-%m-%dT%H:%M:%S%z", + "%Y-%m-%dT%H:%M%z", + "%Y-%m-%d %H:%M:%S.%f%z", + "%Y-%m-%d %H:%M:%S%z", + "%Y-%m-%d %H:%M%z", + ] + for _ in range(10): + random_timezone = random.choice(list(zoneinfo.available_timezones())) + random_time_delta = timedelta( + hours=random.randint(0, 24), + minutes=random.randint(0, 60), + seconds=random.randint(0, 60), + ) + random_datetime = datetime.now(tz=ZoneInfo(random_timezone)) - random_time_delta + random_sensor_id = "".join(random.choices(string.ascii_letters, k=10)) + random_value = "".join(random.choices(string.ascii_letters, k=10)) + random_datetime_format = random.choice(all_datetime_formats) + rows.append( + ( + random_datetime.strftime(random_datetime_format), + random_sensor_id, + str(random_value), + ) + ) + return rows + + +def random_csv_file(base_dir: Path, rows: list[tuple[str, ...]]) -> str: + filename = "".join(random.choices(string.ascii_letters, k=10)) + ".csv" + filepath = base_dir.joinpath(filename) + with open(filepath, "w") as csvfile: + writer = csv.writer(csvfile, delimiter=",") + writer.writerows(rows) + return str(filepath) + + +def random_tsv_file(base_dir: Path, rows: list[tuple[str, ...]]) -> str: + filename = "".join(random.choices(string.ascii_letters, k=10)) + ".tsv" + filepath = base_dir.joinpath(filename) + with open(filepath, "w") as csvfile: + writer = csv.writer(csvfile, delimiter="\t") + writer.writerows(rows) + return str(filepath) + + +def random_ndjson_file(base_dir: Path, rows: list[tuple[str, ...]]) -> str: + filename = "".join(random.choices(string.ascii_letters, k=10)) + ".tsv" + filepath = base_dir.joinpath(filename) + with open(filepath, "w") as csvfile: + for row in rows: + json.dump(row, csvfile) + csvfile.write("\n") + return str(filepath) diff --git a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/conftest.py b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/conftest.py index 4131138..f8db06d 100644 --- a/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/conftest.py +++ b/consumer/tests/test_adapters/test_upsert_iot_records/test_postgres/conftest.py @@ -18,13 +18,13 @@ def init_postgres_tables() -> None: """ CREATE TABLE IF NOT EXISTS records ( record_time TIMESTAMPTZ NOT NULL, - sensor_id CHAR(64) NOT NULL, + sensor_id TEXT NOT NULL, value DOUBLE PRECISION NOT NULL, PRIMARY KEY(record_time, sensor_id) ); - CREATE INDEX IF NOT EXISTS idx_records_record_time ON records USING BTREE (record_time); - CREATE INDEX IF NOT EXISTS idx_records_sensor_id ON records USING BTREE (sensor_id); + CREATE INDEX IF NOT EXISTS idx_records_record_time ON records USING BRIN (record_time); + CREATE INDEX IF NOT EXISTS idx_records_sensor_id ON records USING HASH (sensor_id); """ ) conn.commit() diff --git a/database/assets/create_records_table.sql b/database/assets/create_records_table.sql index 901480f..43d0d1e 100644 --- a/database/assets/create_records_table.sql +++ b/database/assets/create_records_table.sql @@ -1,9 +1,9 @@ CREATE TABLE IF NOT EXISTS records ( record_time TIMESTAMPTZ NOT NULL, - sensor_id CHAR(64) NOT NULL, + sensor_id TEXT NOT NULL, value DOUBLE PRECISION NOT NULL, PRIMARY KEY(record_time, sensor_id) ); -CREATE INDEX IF NOT EXISTS idx_records_record_time ON records USING BTREE (record_time); -CREATE INDEX IF NOT EXISTS idx_records_sensor_id ON records USING BTREE (sensor_id); \ No newline at end of file +CREATE INDEX IF NOT EXISTS idx_records_record_time ON records USING BRIN (record_time); +CREATE INDEX IF NOT EXISTS idx_records_sensor_id ON records USING HASH (sensor_id); diff --git a/docker-compose.yml b/docker-compose.yml index a91bc37..fd4a64f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -74,3 +74,4 @@ services: POSTGRES_BATCH_UPSERT_SIZE: ${POSTGRES_BATCH_UPSERT_SIZE} CSV_PARSER_RECOGNIZED_DATETIME_FORMATS: ${CSV_PARSER_RECOGNIZED_DATETIME_FORMATS} CSV_PARSER_DELIMITER: ${CSV_PARSER_DELIMITER} + CSV_PARSER_FILE_EXTENSION: ${CSV_PARSER_FILE_EXTENSION} diff --git a/producer/src/deployments/script/config.py b/producer/src/deployments/script/config.py index 76fd068..3fdd49b 100644 --- a/producer/src/deployments/script/config.py +++ b/producer/src/deployments/script/config.py @@ -1,21 +1,25 @@ import os + class ProjectConfig: - TARGET_FILE_DIR = os.getenv('TARGET_FILE_DIR', '/tmp') - TARGET_FILE_EXTENSION = os.getenv('TARGET_FILE_EXTENSION', '.csv') + TARGET_FILE_DIR = os.getenv("TARGET_FILE_DIR", "/tmp") + TARGET_FILE_EXTENSION = os.getenv("TARGET_FILE_EXTENSION", ".csv") + class LoggingConfig: - LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO') - LOG_FORMAT = os.getenv('LOG_FORMAT', '%(asctime)s - %(name)s - %(levelname)s - %(message)s') - LOG_DATE_FORMAT = os.getenv('LOG_DATE_FORMAT', '%Y-%m-%d %H:%M:%S') - LOG_DIR = os.getenv('LOG_DIR', '/tmp') - LOG_RETENTION = os.getenv('LOG_RETENTION', '7') - LOG_ROTATION = os.getenv('LOG_ROTATION', 'midnight') - + LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO") + LOG_FORMAT = os.getenv( + "LOG_FORMAT", "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) + LOG_DATE_FORMAT = os.getenv("LOG_DATE_FORMAT", "%Y-%m-%d %H:%M:%S") + LOG_DIR = os.getenv("LOG_DIR", "/tmp") + LOG_RETENTION = os.getenv("LOG_RETENTION", "7") + LOG_ROTATION = os.getenv("LOG_ROTATION", "midnight") + + class RabbitMQConfig: - HOST = os.getenv('RABBITMQ_HOST', 'localhost') - PORT = int(os.getenv('RABBITMQ_PORT', 5672)) - USERNAME = os.getenv('RABBITMQ_USERNAME', 'guest') - PASSWORD = os.getenv('RABBITMQ_PASSWORD', 'guest') - QUEUE = os.getenv('RABBITMQ_QUEUE', 'filenames') - \ No newline at end of file + HOST = os.getenv("RABBITMQ_HOST", "localhost") + PORT = int(os.getenv("RABBITMQ_PORT", 5672)) + USERNAME = os.getenv("RABBITMQ_USERNAME", "guest") + PASSWORD = os.getenv("RABBITMQ_PASSWORD", "guest") + QUEUE = os.getenv("RABBITMQ_QUEUE_NAME", "filenames") diff --git a/producer/src/usecases/publish_filenames.py b/producer/src/usecases/publish_filenames.py index 59dac13..50d4954 100644 --- a/producer/src/usecases/publish_filenames.py +++ b/producer/src/usecases/publish_filenames.py @@ -4,7 +4,7 @@ class PublishFilenamesClient(ABC): @overload - def publish(self, filename: str) -> bool: # type: ignore[overload-overlap] + def publish(self, filename: str) -> bool: pass @overload From 7964eeb91abd959d788f6c0ab1da31e207d565fb Mon Sep 17 00:00:00 2001 From: alexau Date: Sun, 3 Dec 2023 18:47:48 +0800 Subject: [PATCH 32/36] Updated the main test --- .env | 1 + Makefile | 2 +- .../src/adapters/fetch_filenames/rabbitmq.py | 94 ------ .../__init__.py | 0 .../fetch_filenames_stream/rabbitmq.py | 165 +++++++++++ .../adapters/file_parse_iot_records/csv.py | 31 +- consumer/src/deployments/script/main.py | 99 ++++--- .../src/deployments/script/setup_logging.py | 2 +- consumer/src/usecases/__init__.py | 2 +- consumer/src/usecases/fetch_filenames.py | 12 - .../src/usecases/fetch_filenames_stream.py | 38 +++ .../src/usecases/file_parse_iot_records.py | 8 +- .../test_rabbitmq/conftest.py | 15 +- .../test_rabbitmq/test_ack_failed.py | 41 +++ .../test_ack_remove_data_in_stream.py | 80 +++++ .../test_rabbitmq/test_close_conn_failed.py | 20 +- .../test_close_conn_successful.py | 16 +- .../test_rabbitmq/test_failed_conn.py | 48 +-- .../test_rabbitmq/test_failed_fetch.py | 43 ++- .../test_rabbitmq/test_poll_until_timeout.py | 22 +- .../test_rabbitmq/test_reject_failed.py | 41 +++ .../test_reject_retain_data_in_stream.py | 94 ++++++ .../test_rabbitmq/test_successful_fetch.py | 24 +- .../test_csv/conftest.py | 1 + .../test_failed_file_error_return_none.py | 68 +++++ .../test_csv/test_failed_file_not_exists.py | 82 +++++ .../test_failed_file_stream_error_raise.py | 35 +++ .../test_csv/test_failed_open_file.py | 110 ------- .../test_failed_other_file_formats.py | 4 +- .../test_csv/test_failed_parse_dir.py | 53 ++++ .../test_csv/utils.py | 2 - consumer/tests/test_deployments/__init__.py | 0 .../test_deployments/test_script/__init__.py | 0 .../test_script/test_main/__init__.py | 0 .../test_script/test_main/conftest.py | 170 +++++++++++ .../test_main_failed_read_stream_raise.py | 48 +++ .../test_main_read_file_resilience.py | 87 ++++++ .../test_main/test_main_successful.py | 279 ++++++++++++++++++ .../test_main_upsert_record_resilience.py | 168 +++++++++++ .../test_script/test_main/utils.py | 134 +++++++++ docker-compose.yml | 3 + test_generator.py | 7 + 42 files changed, 1811 insertions(+), 338 deletions(-) delete mode 100644 consumer/src/adapters/fetch_filenames/rabbitmq.py rename consumer/src/adapters/{fetch_filenames => fetch_filenames_stream}/__init__.py (100%) create mode 100644 consumer/src/adapters/fetch_filenames_stream/rabbitmq.py delete mode 100644 consumer/src/usecases/fetch_filenames.py create mode 100644 consumer/src/usecases/fetch_filenames_stream.py create mode 100644 consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_ack_failed.py create mode 100644 consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_ack_remove_data_in_stream.py create mode 100644 consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_reject_failed.py create mode 100644 consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_reject_retain_data_in_stream.py create mode 100644 consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_file_error_return_none.py create mode 100644 consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_file_not_exists.py create mode 100644 consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_file_stream_error_raise.py delete mode 100644 consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_open_file.py create mode 100644 consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_parse_dir.py create mode 100644 consumer/tests/test_deployments/__init__.py create mode 100644 consumer/tests/test_deployments/test_script/__init__.py create mode 100644 consumer/tests/test_deployments/test_script/test_main/__init__.py create mode 100644 consumer/tests/test_deployments/test_script/test_main/conftest.py create mode 100644 consumer/tests/test_deployments/test_script/test_main/test_main_failed_read_stream_raise.py create mode 100644 consumer/tests/test_deployments/test_script/test_main/test_main_read_file_resilience.py create mode 100644 consumer/tests/test_deployments/test_script/test_main/test_main_successful.py create mode 100644 consumer/tests/test_deployments/test_script/test_main/test_main_upsert_record_resilience.py create mode 100644 consumer/tests/test_deployments/test_script/test_main/utils.py create mode 100644 test_generator.py diff --git a/.env b/.env index 750e8fd..ee2c7c9 100644 --- a/.env +++ b/.env @@ -32,6 +32,7 @@ CONSUMER_LOG_DATE_FORMAT="%Y-%m-%d %H:%M:%S" CONSUMER_LOG_DIR=./logs/producer CONSUMER_LOG_RETENTION=7 CONSUMER_LOG_ROTATION=midnight +CONSUMER_REPLICAS=16 CSV_PARSER_RECOGNIZED_DATETIME_FORMATS="%Y-%m-%dT%H:%M:%S.%f%z" CSV_PARSER_DELIMITER="," diff --git a/Makefile b/Makefile index 86d53ca..78c985f 100644 --- a/Makefile +++ b/Makefile @@ -45,7 +45,7 @@ test_consumer: export QUEUE_NAME=$(QUEUE_NAME) && \ export CSV_PARSER_RECOGNIZED_DATETIME_FORMATS=$(CSV_PARSER_RECOGNIZED_DATETIME_FORMATS) && \ export CSV_PARSER_DELIMITER=$(CSV_PARSER_DELIMITER) && \ - COVERAGE_FILE=.coverage_consumer coverage run -m pytest -vxs consumer/tests + COVERAGE_FILE=.coverage_consumer coverage run -m pytest -vx --last-failed consumer/tests coverage_report: coverage combine .coverage_producer .coverage_consumer && \ coverage report -m --omit="*/tests/*" diff --git a/consumer/src/adapters/fetch_filenames/rabbitmq.py b/consumer/src/adapters/fetch_filenames/rabbitmq.py deleted file mode 100644 index fe858bc..0000000 --- a/consumer/src/adapters/fetch_filenames/rabbitmq.py +++ /dev/null @@ -1,94 +0,0 @@ -from contextlib import contextmanager -from datetime import datetime -import time -from ...usecases import FetchFilenameClient -import pika -from pika.adapters.blocking_connection import BlockingChannel -from pika.spec import Basic, BasicProperties -from pika.connection import Connection -from typing import Generator, Iterator, Optional -from typing_extensions import override -from collections.abc import Callable -import logging - - -class RabbitMQFetchFilenamesClient(FetchFilenameClient): - def __init__( - self, - host: str, - port: int, - credentials_service: Callable[[], tuple[str, str]], - queue: str = "filenames", - polling_timeout: int = 10, - ) -> None: - self._host = host - self._port = port - self._credentials_service = credentials_service - self._queue = queue - self._conn: Optional[Connection] = None - self._polling_timeout = polling_timeout - self._last_poll_time: Optional[datetime] = None - - def _reset_conn(self) -> None: - self._conn = None - - @contextmanager - def _get_amqp_conn(self) -> Iterator[pika.BaseConnection]: - if self._conn is None or self._conn.is_closed: - username, password = self._credentials_service() - credentials = pika.PlainCredentials(username, password) - conn_parameters = pika.ConnectionParameters( - host=self._host, - port=self._port, - credentials=credentials, - ) - self._conn = pika.BlockingConnection(conn_parameters) - yield self._conn - - def _wait(self) -> None: - time.sleep(0.5) - - @override - def fetch(self) -> Generator[str, None, None]: - while True: - try: - method: Optional[Basic.Deliver] = None - with self._get_amqp_conn() as connection: - channel: BlockingChannel = connection.channel() - channel.queue_declare(queue=self._queue, durable=True) - properties: Optional[BasicProperties] - body: Optional[bytes] - - method, properties, body = channel.basic_get( - queue=self._queue, auto_ack=False - ) - if method is None and properties is None and body is None: - if self._last_poll_time is None: - self._last_poll_time = datetime.now() - if ( - datetime.now() - self._last_poll_time - ).total_seconds() > self._polling_timeout: - break - self._wait() - continue - - self._last_poll_time = None - - yield body.decode() - - channel.basic_ack(delivery_tag=method.delivery_tag) - except Exception as e: - logging.exception(e) - if method is not None: - channel.basic_reject(delivery_tag=method.delivery_tag, requeue=True) - self._reset_conn() - - @override - def close(self) -> bool: - try: - if self._conn is not None: - self._conn.close() - return True - except Exception as e: - logging.exception(e) - return False diff --git a/consumer/src/adapters/fetch_filenames/__init__.py b/consumer/src/adapters/fetch_filenames_stream/__init__.py similarity index 100% rename from consumer/src/adapters/fetch_filenames/__init__.py rename to consumer/src/adapters/fetch_filenames_stream/__init__.py diff --git a/consumer/src/adapters/fetch_filenames_stream/rabbitmq.py b/consumer/src/adapters/fetch_filenames_stream/rabbitmq.py new file mode 100644 index 0000000..bb27e75 --- /dev/null +++ b/consumer/src/adapters/fetch_filenames_stream/rabbitmq.py @@ -0,0 +1,165 @@ +from concurrent.futures import ThreadPoolExecutor +from contextlib import contextmanager +from datetime import datetime +import time +from ...usecases import FetchFilenameStreamClient +import pika +from pika.adapters.blocking_connection import BlockingChannel +from pika.spec import Basic, BasicProperties +from pika.connection import Connection +from typing import Generator, Iterator, Optional, Sequence, cast, overload +from typing_extensions import override +from collections.abc import Callable +import logging + + +class RabbitMQFetchFilenameStreamClient(FetchFilenameStreamClient[int]): + def __init__( + self, + host: str, + port: int, + credentials_service: Callable[[], tuple[str, str]], + queue: str = "filenames", + polling_timeout: int = 10, + ) -> None: + self._host = host + self._port = port + self._credentials_service = credentials_service + self._queue = queue + self._conn: Optional[Connection] = None + self._channel: Optional[BlockingChannel] = None + self._polling_timeout = polling_timeout + self._last_poll_time: Optional[datetime] = None + + @overload + def ack(self, message_receipt: int) -> bool: + ... + + @overload + def ack(self, message_receipt: Sequence[int]) -> list[bool]: + ... + + @override + def ack(self, message_receipt: int | Sequence[int]) -> bool | list[bool]: + if isinstance(message_receipt, int): + return self._ack_single(message_receipt) + return self._ack_batch(message_receipt) + + def _ack_single(self, message_receipt: int) -> bool: + try: + with self._get_channel() as channel: + channel.basic_ack(delivery_tag=message_receipt, multiple=False) + return True + except Exception as e: + logging.exception(e) + return False + + def _ack_batch(self, message_receipts: Sequence[int]) -> list[bool]: + #! RabbitMQ is not thread-safe, so we have to use a single thread to ack + results: list[bool] = [] + for receipt in message_receipts: + results.append(self._ack_single(receipt)) + return results + + @overload + def reject(self, message_receipt: int) -> bool: + ... + + @overload + def reject(self, message_receipt: Sequence[int]) -> list[bool]: + ... + + @override + def reject(self, message_receipt: int | Sequence[int]) -> bool | list[bool]: + if isinstance(message_receipt, int): + return self._reject_single(message_receipt) + return self._reject_batch(message_receipt) + + def _reject_single(self, message_receipt: int) -> bool: + try: + with self._get_channel() as channel: + channel.basic_nack(delivery_tag=message_receipt, requeue=True) + return True + except Exception as e: + logging.exception(e) + return False + + def _reject_batch(self, message_receipts: Sequence[int]) -> list[bool]: + #! RabbitMQ is not thread-safe, so we have to use a single thread to ack + results: list[bool] = [] + for receipt in message_receipts: + results.append(self._reject_single(receipt)) + return results + + def _reset_conn(self) -> None: + self._conn = None + self._channel = None + + @contextmanager + def _get_amqp_conn(self) -> Iterator[Connection]: + if self._conn is None or self._conn.is_closed: + username, password = self._credentials_service() + credentials = pika.PlainCredentials(username, password) + conn_parameters = pika.ConnectionParameters( + host=self._host, + port=self._port, + credentials=credentials, + ) + self._conn = pika.BlockingConnection(conn_parameters) + yield self._conn + + @contextmanager + def _get_channel(self) -> Iterator[BlockingChannel]: + if self._channel is None or self._channel.is_closed: + with self._get_amqp_conn() as connection: + self._channel = connection.channel() + yield self._channel + + def _wait(self) -> None: + time.sleep(0.5) + + @override + def fetch_stream(self) -> Generator[tuple[str, int], None, None]: + while True: + try: + method: Optional[Basic.Deliver] = None + with self._get_channel() as channel: + channel.queue_declare(queue=self._queue, durable=True) + properties: Optional[BasicProperties] + body: Optional[bytes] + + method, properties, body = channel.basic_get( + queue=self._queue, auto_ack=False + ) + + if method is None and properties is None and body is None: + if self._last_poll_time is None: + self._last_poll_time = datetime.now() + if ( + datetime.now() - self._last_poll_time + ).total_seconds() > self._polling_timeout: + break + self._wait() + continue + + self._last_poll_time = None + + yield body.decode(), cast(int, method.delivery_tag) + + except Exception as e: + logging.exception(e) + if method is not None: + self.reject(method.delivery_tag) + self._reset_conn() + + @override + def close(self) -> bool: + try: + if self._channel is not None: + self._channel.close() + if self._conn is not None: + self._conn.close() + return True + except Exception as e: + logging.exception(e) + return False diff --git a/consumer/src/adapters/file_parse_iot_records/csv.py b/consumer/src/adapters/file_parse_iot_records/csv.py index e38d0ca..eb2f4f8 100644 --- a/consumer/src/adapters/file_parse_iot_records/csv.py +++ b/consumer/src/adapters/file_parse_iot_records/csv.py @@ -8,6 +8,7 @@ from ...usecases import FileParseIOTRecordsClient import csv import logging +from pathlib import Path class CSVParseIOTRecordsClient(FileParseIOTRecordsClient): @@ -22,29 +23,40 @@ def __init__( self._file_extension = file_extension @overload - def parse(self, filename: str) -> list[IOTRecord]: + def parse(self, filename: str) -> Optional[list[IOTRecord]]: ... @overload - def parse(self, filename: Sequence[str]) -> list[list[IOTRecord]]: + def parse(self, filename: Sequence[str]) -> list[Optional[list[IOTRecord]]]: ... @override def parse( self, filename: str | Sequence[str] - ) -> list[IOTRecord] | list[list[IOTRecord]]: + ) -> Optional[list[IOTRecord]] | list[Optional[list[IOTRecord]]]: if isinstance(filename, str): return self._parse_single(filename) return self._parse_batch(filename) + def _basic_file_check(self, filename: str) -> bool: + if not Path(filename).exists(): + raise ValueError("File path must exist!") + if not Path(filename).is_file(): + raise ValueError("File path must be a file!") + if not filename.endswith(self._file_extension): + raise ValueError(f"File extension must be {self._file_extension}") + @override def parse_stream(self, filename: str) -> Iterator[IOTRecord]: try: - if not filename.endswith(self._file_extension): - raise ValueError(f"File extension must be {self._file_extension}") + self._basic_file_check(filename) with open(filename) as csvfile: reader = csv.reader(csvfile, delimiter=self._delimiter, strict=True) yield from self._parse_iter(reader) + except OSError as e: + logging.exception(e) + logging.error(f"Failed to read stream from {filename}!") + raise e except Exception as e: logging.error(f"Failed to parse {filename}") logging.exception(e) @@ -84,17 +96,16 @@ def _parse_iter(self, reader: Iterator[list[str]]) -> Iterator[IOTRecord]: ) return iot_records - def _parse_single(self, filename: str) -> list[IOTRecord]: + def _parse_single(self, filename: str) -> Optional[list[IOTRecord]]: try: - if not filename.endswith(self._file_extension): - raise ValueError(f"File extension must be {self._file_extension}") + self._basic_file_check(filename) with open(filename) as csvfile: reader = csv.reader(csvfile, delimiter=self._delimiter) return list(self._parse_iter(reader)) except Exception as e: - logging.error(f"Failed to parse {filename}") logging.exception(e) - return [] + logging.error(f"Failed to parse {filename}") + return None def _parse_batch(self, filenames: Sequence[str]) -> list[list[IOTRecord]]: with ThreadPoolExecutor() as executor: diff --git a/consumer/src/deployments/script/main.py b/consumer/src/deployments/script/main.py index 09d0022..57cd029 100644 --- a/consumer/src/deployments/script/main.py +++ b/consumer/src/deployments/script/main.py @@ -1,54 +1,85 @@ -from ...adapters.fetch_filenames.rabbitmq import RabbitMQFetchFilenamesClient +from ...adapters.fetch_filenames_stream.rabbitmq import ( + RabbitMQFetchFilenameStreamClient, +) from ...adapters.file_parse_iot_records.csv import CSVParseIOTRecordsClient from ...adapters.upsert_iot_records.postgres import PostgresUpsertIOTRecordsClient from .config import RabbitMQConfig, PostgresConfig, CSVParserConfig -from setup_logging import setup_logging -import logging +from .setup_logging import setup_logging from ...entities import IOTRecord +import logging setup_logging() -fetch_filenames_client = RabbitMQFetchFilenamesClient( - host=RabbitMQConfig.HOST, - port=RabbitMQConfig.PORT, - username=RabbitMQConfig.USERNAME, - password=RabbitMQConfig.PASSWORD, - queue=RabbitMQConfig.QUEUE, - polling_timeout=RabbitMQConfig.POLLING_TIMEOUT, -) +logging.getLogger("pika").setLevel(logging.ERROR) -file_parse_iot_records_client = CSVParseIOTRecordsClient( - recognized_datetime_formats=CSVParserConfig.RECOGNIZED_DATETIME_FORMATS, - delimiter=CSVParserConfig.DELIMITER, - file_extension=CSVParserConfig.FILE_EXTENSION, -) -upsert_iot_records_client = PostgresUpsertIOTRecordsClient( - host=PostgresConfig.HOST, - port=PostgresConfig.PORT, - credentials_service=lambda: (PostgresConfig.USERNAME, PostgresConfig.PASSWORD), - database=PostgresConfig.DATABASE, - batch_upsert_size=PostgresConfig.BATCH_UPSERT_SIZE, -) +def _upsert_iot_records_buffer( + iot_records_buffer: list[IOTRecord], + upsert_iot_records_client: PostgresUpsertIOTRecordsClient, +) -> None: + successes = upsert_iot_records_client.upsert(iot_records_buffer) + + if not all(successes): + raise Exception("Failed to upsert all records!") def main() -> None: - filestream_buffer: list[IOTRecord] = [] + fetch_filenames_stream_client = RabbitMQFetchFilenameStreamClient( + host=RabbitMQConfig.HOST, + port=RabbitMQConfig.PORT, + credentials_service=lambda: (RabbitMQConfig.USERNAME, RabbitMQConfig.PASSWORD), + queue=RabbitMQConfig.QUEUE, + polling_timeout=RabbitMQConfig.POLLING_TIMEOUT, + ) + + file_parse_iot_records_client = CSVParseIOTRecordsClient( + recognized_datetime_formats=CSVParserConfig.RECOGNIZED_DATETIME_FORMATS, + delimiter=CSVParserConfig.DELIMITER, + file_extension=CSVParserConfig.FILE_EXTENSION, + ) + + upsert_iot_records_client = PostgresUpsertIOTRecordsClient( + host=PostgresConfig.HOST, + port=PostgresConfig.PORT, + credentials_service=lambda: (PostgresConfig.USERNAME, PostgresConfig.PASSWORD), + database=PostgresConfig.DATABASE, + batch_upsert_size=PostgresConfig.BATCH_UPSERT_SIZE, + ) + try: - for filename in fetch_filenames_client.fetch(): - for iot_record in file_parse_iot_records_client.parse_stream(filename): - filestream_buffer.append(iot_record) - if len(filestream_buffer) >= PostgresConfig.BATCH_UPSERT_SIZE: - upsert_iot_records_client.upsert(filestream_buffer) - filestream_buffer.clear() - if filestream_buffer: - upsert_iot_records_client.upsert(filestream_buffer) - filestream_buffer.clear() + for filename, receipt in fetch_filenames_stream_client.fetch_stream(): + logging.info(f"Upserting {filename}...") + iot_records_buffer: list[IOTRecord] = [] + try: + for iot_record in file_parse_iot_records_client.parse_stream(filename): + iot_records_buffer.append(iot_record) + + if len(iot_records_buffer) < PostgresConfig.BATCH_UPSERT_SIZE: + continue + + _upsert_iot_records_buffer( + iot_records_buffer, upsert_iot_records_client + ) + iot_records_buffer.clear() + + if len(iot_records_buffer) > 0: + _upsert_iot_records_buffer( + iot_records_buffer, upsert_iot_records_client + ) + + logging.info(f"Successfully upserted {filename}!") + fetch_filenames_stream_client.ack(receipt) + except Exception as e: + logging.exception(e) + fetch_filenames_stream_client.reject(receipt) + logging.error(f"Failed to upsert {filename}!") + finally: + iot_records_buffer.clear() except Exception as e: logging.exception(e) raise e finally: - fetch_filenames_client.close() + fetch_filenames_stream_client.close() upsert_iot_records_client.close() diff --git a/consumer/src/deployments/script/setup_logging.py b/consumer/src/deployments/script/setup_logging.py index 161394c..dcae074 100644 --- a/consumer/src/deployments/script/setup_logging.py +++ b/consumer/src/deployments/script/setup_logging.py @@ -1,6 +1,6 @@ import logging from logging.handlers import TimedRotatingFileHandler -from config import LoggingConfig +from .config import LoggingConfig import pathlib diff --git a/consumer/src/usecases/__init__.py b/consumer/src/usecases/__init__.py index 4265028..1fc456a 100644 --- a/consumer/src/usecases/__init__.py +++ b/consumer/src/usecases/__init__.py @@ -1,3 +1,3 @@ -from .fetch_filenames import FetchFilenameClient +from .fetch_filenames_stream import FetchFilenameStreamClient from .file_parse_iot_records import FileParseIOTRecordsClient from .upsert_iot_records import UpsertIOTRecordsClient diff --git a/consumer/src/usecases/fetch_filenames.py b/consumer/src/usecases/fetch_filenames.py deleted file mode 100644 index c63f791..0000000 --- a/consumer/src/usecases/fetch_filenames.py +++ /dev/null @@ -1,12 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Generator - - -class FetchFilenameClient(ABC): - @abstractmethod - def fetch(self) -> Generator[str, None, None]: - ... - - @abstractmethod - def close(self) -> bool: - ... diff --git a/consumer/src/usecases/fetch_filenames_stream.py b/consumer/src/usecases/fetch_filenames_stream.py new file mode 100644 index 0000000..d96527c --- /dev/null +++ b/consumer/src/usecases/fetch_filenames_stream.py @@ -0,0 +1,38 @@ +from abc import ABC, abstractmethod +from typing import Generator, Sequence, overload, TypeVar, Generic + +T = TypeVar("T") + + +class FetchFilenameStreamClient(ABC, Generic[T]): + @overload + def ack(self, message_receipt: T) -> bool: + ... + + @overload + def ack(self, message_receipt: Sequence[T]) -> list[bool]: + ... + + @abstractmethod + def ack(self, message_receipt: T | Sequence[T]) -> bool | list[bool]: + ... + + @overload + def reject(self, message_receipt: T) -> bool: + ... + + @overload + def reject(self, message_receipt: Sequence[T]) -> list[bool]: + ... + + @abstractmethod + def reject(self, message_receipt: T | Sequence[T]) -> bool | list[bool]: + ... + + @abstractmethod + def fetch_stream(self) -> Generator[tuple[str, T], None, None]: + ... + + @abstractmethod + def close(self) -> bool: + ... diff --git a/consumer/src/usecases/file_parse_iot_records.py b/consumer/src/usecases/file_parse_iot_records.py index ca2276c..d64690c 100644 --- a/consumer/src/usecases/file_parse_iot_records.py +++ b/consumer/src/usecases/file_parse_iot_records.py @@ -1,21 +1,21 @@ from abc import ABC, abstractmethod -from typing import Iterator, overload, Sequence +from typing import Iterator, Optional, overload, Sequence from ..entities import IOTRecord class FileParseIOTRecordsClient(ABC): @overload - def parse(self, filename: str) -> list[IOTRecord]: + def parse(self, filename: str) -> Optional[list[IOTRecord]]: ... @overload - def parse(self, filename: Sequence[str]) -> list[list[IOTRecord]]: + def parse(self, filename: Sequence[str]) -> list[Optional[list[IOTRecord]]]: ... @abstractmethod def parse( self, filename: str | Sequence[str] - ) -> list[IOTRecord] | list[list[IOTRecord]]: + ) -> Optional[list[IOTRecord]] | list[Optional[list[IOTRecord]]]: ... @abstractmethod diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/conftest.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/conftest.py index 7ca45ac..0c38d86 100644 --- a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/conftest.py +++ b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/conftest.py @@ -1,13 +1,14 @@ -from src.adapters.fetch_filenames.rabbitmq import RabbitMQFetchFilenamesClient +from src.adapters.fetch_filenames_stream.rabbitmq import ( + RabbitMQFetchFilenameStreamClient, +) from src.deployments.script.config import RabbitMQConfig import pika import pytest -from pytest import MonkeyPatch @pytest.fixture(scope="function") -def rabbitmq_fetch_filenames_client() -> RabbitMQConfig: - return RabbitMQFetchFilenamesClient( +def rabbitmq_fetch_filenames_stream_client() -> RabbitMQConfig: + return RabbitMQFetchFilenameStreamClient( host=RabbitMQConfig.HOST, port=RabbitMQConfig.PORT, credentials_service=lambda: (RabbitMQConfig.USERNAME, RabbitMQConfig.PASSWORD), @@ -17,8 +18,8 @@ def rabbitmq_fetch_filenames_client() -> RabbitMQConfig: @pytest.fixture(scope="function") -def rabbitmq_fetch_filenames_no_wait_client() -> RabbitMQConfig: - return RabbitMQFetchFilenamesClient( +def rabbitmq_fetch_filenames_stream_no_wait_client() -> RabbitMQConfig: + return RabbitMQFetchFilenameStreamClient( host=RabbitMQConfig.HOST, port=RabbitMQConfig.PORT, credentials_service=lambda: (RabbitMQConfig.USERNAME, RabbitMQConfig.PASSWORD), @@ -48,7 +49,9 @@ def setup_teardown_rabbitmq_queue( pika_conn, queue = raw_rabbitmq_pika_conn_config channel = pika_conn.channel() + channel.queue_delete(queue=queue) channel.queue_declare(queue=queue, durable=True) channel.queue_purge(queue=queue) yield channel.queue_purge(queue=queue) + channel.queue_delete(queue=queue) diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_ack_failed.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_ack_failed.py new file mode 100644 index 0000000..03f6f49 --- /dev/null +++ b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_ack_failed.py @@ -0,0 +1,41 @@ +import pytest +from .utils import random_csv_filenames +from src.adapters.fetch_filenames_stream.rabbitmq import ( + RabbitMQFetchFilenameStreamClient, +) +import pika +from pytest import MonkeyPatch + + +@pytest.mark.parametrize("filename", random_csv_filenames()) +def test_fetch_single_ack_failed( + rabbitmq_fetch_filenames_stream_no_wait_client: RabbitMQFetchFilenameStreamClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + filename: str, + monkeypatch: MonkeyPatch, +): + conn, queue = raw_rabbitmq_pika_conn_config + + channel = conn.channel() + + channel.queue_declare(queue=queue, durable=True) + + channel.basic_publish( + exchange="", + routing_key=rabbitmq_fetch_filenames_stream_no_wait_client._queue, + body=filename, + properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), + ) + + for ( + fetched_filename, + receipt, + ) in rabbitmq_fetch_filenames_stream_no_wait_client.fetch_stream(): + assert fetched_filename == filename + + def mock_ack(self, *args, **kwargs): + raise Exception("Failed to ack!") + + monkeypatch.setattr(pika.channel.Channel, "basic_ack", mock_ack) + + assert not rabbitmq_fetch_filenames_stream_no_wait_client.ack(receipt) diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_ack_remove_data_in_stream.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_ack_remove_data_in_stream.py new file mode 100644 index 0000000..9ca8041 --- /dev/null +++ b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_ack_remove_data_in_stream.py @@ -0,0 +1,80 @@ +import pytest +from .utils import random_csv_filenames +from src.adapters.fetch_filenames_stream.rabbitmq import ( + RabbitMQFetchFilenameStreamClient, +) +import pika + + +@pytest.mark.smoke +@pytest.mark.parametrize("filename", random_csv_filenames()) +def test_fetch_single_ack_remove_data_in_stream( + rabbitmq_fetch_filenames_stream_no_wait_client: RabbitMQFetchFilenameStreamClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + filename: str, +): + conn, queue = raw_rabbitmq_pika_conn_config + + channel = conn.channel() + + channel.queue_declare(queue=queue, durable=True) + + channel.basic_publish( + exchange="", + routing_key=rabbitmq_fetch_filenames_stream_no_wait_client._queue, + body=filename, + properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), + ) + + for ( + fetched_filename, + receipt, + ) in rabbitmq_fetch_filenames_stream_no_wait_client.fetch_stream(): + assert fetched_filename == filename + + assert rabbitmq_fetch_filenames_stream_no_wait_client.ack(receipt) + + method_frame, _, body = channel.basic_get(queue=queue) + assert method_frame is None + assert body is None + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "filenames", + [random_csv_filenames() for _ in range(5)], +) +def test_fetch_batch_ack_remove_data_in_stream( + rabbitmq_fetch_filenames_stream_no_wait_client: RabbitMQFetchFilenameStreamClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + filenames: list[str], +): + conn, queue = raw_rabbitmq_pika_conn_config + + channel = conn.channel() + + channel.queue_declare(queue=queue, durable=True) + + for filename in filenames: + channel.basic_publish( + exchange="", + routing_key=queue, + body=filename, + properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), + ) + + all_filenames = [] + all_receipts = [] + for ( + filename, + receipt, + ) in rabbitmq_fetch_filenames_stream_no_wait_client.fetch_stream(): + all_filenames.append(filename) + + assert all(rabbitmq_fetch_filenames_stream_no_wait_client.ack(all_receipts)) + + assert sorted(all_filenames) == sorted(filenames) + + method_frame, _, body = channel.basic_get(queue=queue) + assert method_frame is None + assert body is None diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_close_conn_failed.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_close_conn_failed.py index ee46fa5..d36b105 100644 --- a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_close_conn_failed.py +++ b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_close_conn_failed.py @@ -1,11 +1,13 @@ from pytest import MonkeyPatch, LogCaptureFixture import pika -from src.adapters.fetch_filenames.rabbitmq import RabbitMQFetchFilenamesClient +from src.adapters.fetch_filenames_stream.rabbitmq import ( + RabbitMQFetchFilenameStreamClient, +) from .utils import random_csv_filenames def test_close_conn_failed( - rabbitmq_fetch_filenames_no_wait_client: RabbitMQFetchFilenamesClient, + rabbitmq_fetch_filenames_stream_no_wait_client: RabbitMQFetchFilenameStreamClient, raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], monkeypatch: MonkeyPatch, caplog: LogCaptureFixture, @@ -15,20 +17,24 @@ def test_close_conn_failed( channel = conn.channel() channel.queue_declare( - queue=rabbitmq_fetch_filenames_no_wait_client._queue, durable=True + queue=rabbitmq_fetch_filenames_stream_no_wait_client._queue, durable=True ) channel.basic_publish( exchange="", - routing_key=rabbitmq_fetch_filenames_no_wait_client._queue, + routing_key=rabbitmq_fetch_filenames_stream_no_wait_client._queue, body=random_csv_filenames()[0], properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), ) - for filename in rabbitmq_fetch_filenames_no_wait_client.fetch(): + for ( + filename, + receipt, + ) in rabbitmq_fetch_filenames_stream_no_wait_client.fetch_stream(): assert filename is not None + assert rabbitmq_fetch_filenames_stream_no_wait_client.ack(receipt) - assert rabbitmq_fetch_filenames_no_wait_client._conn is not None + assert rabbitmq_fetch_filenames_stream_no_wait_client._conn is not None def mock_failed_close( self, @@ -40,5 +46,5 @@ def mock_failed_close( monkeypatch.setattr(pika.BlockingConnection, "close", mock_failed_close) with caplog.at_level("ERROR"): - assert not rabbitmq_fetch_filenames_no_wait_client.close() + assert not rabbitmq_fetch_filenames_stream_no_wait_client.close() assert "Failed to close!" in caplog.text diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_close_conn_successful.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_close_conn_successful.py index 756d329..ebc7583 100644 --- a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_close_conn_successful.py +++ b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_close_conn_successful.py @@ -1,16 +1,18 @@ -from src.adapters.fetch_filenames.rabbitmq import RabbitMQFetchFilenamesClient +from src.adapters.fetch_filenames_stream.rabbitmq import ( + RabbitMQFetchFilenameStreamClient, +) def test_close_conn_successful( - rabbitmq_fetch_filenames_no_wait_client: RabbitMQFetchFilenamesClient, + rabbitmq_fetch_filenames_stream_no_wait_client: RabbitMQFetchFilenameStreamClient, ): - for _ in rabbitmq_fetch_filenames_no_wait_client.fetch(): + for _ in rabbitmq_fetch_filenames_stream_no_wait_client.fetch_stream(): pass - assert rabbitmq_fetch_filenames_no_wait_client._conn is not None - assert rabbitmq_fetch_filenames_no_wait_client.close() + assert rabbitmq_fetch_filenames_stream_no_wait_client._conn is not None + assert rabbitmq_fetch_filenames_stream_no_wait_client.close() def test_none_conn_close_successful( - rabbitmq_fetch_filenames_client: RabbitMQFetchFilenamesClient, + rabbitmq_fetch_filenames_stream_client: RabbitMQFetchFilenameStreamClient, ): - assert rabbitmq_fetch_filenames_client.close() + assert rabbitmq_fetch_filenames_stream_client.close() diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_failed_conn.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_failed_conn.py index e4d9e2c..ec30bb6 100644 --- a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_failed_conn.py +++ b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_failed_conn.py @@ -1,6 +1,8 @@ import pytest from .utils import random_csv_filenames -from src.adapters.fetch_filenames.rabbitmq import RabbitMQFetchFilenamesClient +from src.adapters.fetch_filenames_stream.rabbitmq import ( + RabbitMQFetchFilenameStreamClient, +) from src.deployments.script.config import RabbitMQConfig import pika from pytest import MonkeyPatch @@ -8,7 +10,7 @@ @pytest.mark.smoke def test_fetch_failed_conn( - rabbitmq_fetch_filenames_client: RabbitMQFetchFilenamesClient, + rabbitmq_fetch_filenames_stream_client: RabbitMQFetchFilenameStreamClient, monkeypatch: MonkeyPatch, ): def mocked_failed_conn( @@ -20,10 +22,12 @@ def mocked_failed_conn( monkeypatch.setattr(pika.BlockingConnection, "__init__", mocked_failed_conn) - monkeypatch.setattr(RabbitMQFetchFilenamesClient, "_reset_conn", mocked_failed_conn) + monkeypatch.setattr( + RabbitMQFetchFilenameStreamClient, "_reset_conn", mocked_failed_conn + ) with pytest.raises(Exception, match="^Failed to connect$"): - next(rabbitmq_fetch_filenames_client.fetch()) + next(rabbitmq_fetch_filenames_stream_client.fetch_stream()) monkeypatch.undo() monkeypatch.undo() @@ -33,7 +37,7 @@ def mocked_failed_conn( def test_fetch_wrong_credentials( monkeypatch: MonkeyPatch, ): - rabbitmq_fetch_filenames_client = RabbitMQFetchFilenamesClient( + rabbitmq_fetch_filenames_stream_client = RabbitMQFetchFilenameStreamClient( host=RabbitMQConfig.HOST, port=RabbitMQConfig.PORT, credentials_service=lambda: ("wrong", "wrong"), @@ -48,10 +52,12 @@ def mocked_failed_conn( ) -> None: raise Exception("Failed to connect") - monkeypatch.setattr(RabbitMQFetchFilenamesClient, "_reset_conn", mocked_failed_conn) + monkeypatch.setattr( + RabbitMQFetchFilenameStreamClient, "_reset_conn", mocked_failed_conn + ) with pytest.raises(Exception, match="^Failed to connect$"): - next(rabbitmq_fetch_filenames_client.fetch()) + next(rabbitmq_fetch_filenames_stream_client.fetch_stream()) monkeypatch.undo() @@ -61,7 +67,7 @@ def mocked_failed_conn( def test_publish_single_wrong_host( monkeypatch: MonkeyPatch, ): - rabbitmq_fetch_filenames_client = RabbitMQFetchFilenamesClient( + rabbitmq_fetch_filenames_stream_client = RabbitMQFetchFilenameStreamClient( host="wrong", port=RabbitMQConfig.PORT, credentials_service=lambda: (RabbitMQConfig.USERNAME, RabbitMQConfig.PASSWORD), @@ -76,17 +82,19 @@ def mocked_failed_conn( ) -> None: raise Exception("Failed to connect") - monkeypatch.setattr(RabbitMQFetchFilenamesClient, "_reset_conn", mocked_failed_conn) + monkeypatch.setattr( + RabbitMQFetchFilenameStreamClient, "_reset_conn", mocked_failed_conn + ) with pytest.raises(Exception, match="^Failed to connect$") as e: - next(rabbitmq_fetch_filenames_client.fetch()) + next(rabbitmq_fetch_filenames_stream_client.fetch_stream()) monkeypatch.undo() @pytest.mark.slow def test_fetch_failed_conn_reset_conn( - rabbitmq_fetch_filenames_no_wait_client: RabbitMQFetchFilenamesClient, + rabbitmq_fetch_filenames_stream_no_wait_client: RabbitMQFetchFilenameStreamClient, raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], monkeypatch: MonkeyPatch, ): @@ -101,20 +109,23 @@ def test_fetch_failed_conn_reset_conn( channel.basic_publish( exchange="", - routing_key=rabbitmq_fetch_filenames_no_wait_client._queue, + routing_key=rabbitmq_fetch_filenames_stream_no_wait_client._queue, body=first_published_filename, properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), ) - for i, filename in enumerate(rabbitmq_fetch_filenames_no_wait_client.fetch()): + for i, (filename, receipt) in enumerate( + rabbitmq_fetch_filenames_stream_no_wait_client.fetch_stream() + ): if i == 0: - assert rabbitmq_fetch_filenames_no_wait_client._conn is not None - conn = rabbitmq_fetch_filenames_no_wait_client._conn + assert rabbitmq_fetch_filenames_stream_no_wait_client._conn is not None + conn = rabbitmq_fetch_filenames_stream_no_wait_client._conn assert filename == first_published_filename + assert rabbitmq_fetch_filenames_stream_no_wait_client.ack(receipt) channel.basic_publish( exchange="", - routing_key=rabbitmq_fetch_filenames_no_wait_client._queue, + routing_key=rabbitmq_fetch_filenames_stream_no_wait_client._queue, body=second_published_filename, properties=pika.BasicProperties( delivery_mode=pika.DeliveryMode.Persistent @@ -138,5 +149,6 @@ def mock_failed_fetch( monkeypatch.setattr(pika.channel.Channel, "basic_get", mock_failed_fetch) if i == 1: assert filename == second_published_filename - assert rabbitmq_fetch_filenames_no_wait_client._conn is not None - assert rabbitmq_fetch_filenames_no_wait_client._conn != conn + assert rabbitmq_fetch_filenames_stream_no_wait_client.ack(receipt) + assert rabbitmq_fetch_filenames_stream_no_wait_client._conn is not None + assert rabbitmq_fetch_filenames_stream_no_wait_client._conn != conn diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_failed_fetch.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_failed_fetch.py index 9cddf90..6658b1b 100644 --- a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_failed_fetch.py +++ b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_failed_fetch.py @@ -1,6 +1,8 @@ import pytest from .utils import random_csv_filenames -from src.adapters.fetch_filenames.rabbitmq import RabbitMQFetchFilenamesClient +from src.adapters.fetch_filenames_stream.rabbitmq import ( + RabbitMQFetchFilenameStreamClient, +) import pika import pytest from pytest import LogCaptureFixture, MonkeyPatch @@ -9,7 +11,7 @@ @pytest.mark.smoke @pytest.mark.parametrize("filename", random_csv_filenames()) def test_fetch_single_exception_resilience( - rabbitmq_fetch_filenames_no_wait_client: RabbitMQFetchFilenamesClient, + rabbitmq_fetch_filenames_stream_no_wait_client: RabbitMQFetchFilenameStreamClient, raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], filename: str, monkeypatch: MonkeyPatch, @@ -39,8 +41,12 @@ def mock_failed_fetch( monkeypatch.setattr(pika.channel.Channel, "basic_get", mock_failed_fetch) with caplog.at_level("ERROR"): - for fetched_filename in rabbitmq_fetch_filenames_no_wait_client.fetch(): + for ( + fetched_filename, + receipt, + ) in rabbitmq_fetch_filenames_stream_no_wait_client.fetch_stream(): assert fetched_filename == filename + assert rabbitmq_fetch_filenames_stream_no_wait_client.ack(receipt) assert "Failed to fetch!" in caplog.text @@ -50,7 +56,7 @@ def mock_failed_fetch( [random_csv_filenames() for _ in range(5)], ) def test_fetch_batch_exception_resilience( - rabbitmq_fetch_filenames_no_wait_client: RabbitMQFetchFilenamesClient, + rabbitmq_fetch_filenames_stream_no_wait_client: RabbitMQFetchFilenameStreamClient, raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], filenames: list[str], monkeypatch: MonkeyPatch, @@ -67,7 +73,7 @@ def test_fetch_batch_exception_resilience( for filename in filenames: channel.basic_publish( exchange="", - routing_key=rabbitmq_fetch_filenames_no_wait_client._queue, + routing_key=rabbitmq_fetch_filenames_stream_no_wait_client._queue, body=filename, properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), ) @@ -91,8 +97,12 @@ def mock_failed_fetch( all_filenames = [] with caplog.at_level("ERROR"): - for fetched_filename in rabbitmq_fetch_filenames_no_wait_client.fetch(): + for ( + fetched_filename, + receipt, + ) in rabbitmq_fetch_filenames_stream_no_wait_client.fetch_stream(): all_filenames.append(fetched_filename) + assert rabbitmq_fetch_filenames_stream_no_wait_client.ack(receipt) assert "Failed to fetch!" in caplog.text assert sorted(all_filenames) == sorted(filenames) @@ -100,16 +110,16 @@ def mock_failed_fetch( @pytest.mark.parametrize("filename", random_csv_filenames()) def test_fetch_single_ack_exception_resilience( - rabbitmq_fetch_filenames_client: RabbitMQFetchFilenamesClient, + rabbitmq_fetch_filenames_stream_client: RabbitMQFetchFilenameStreamClient, raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], filename: str, monkeypatch: MonkeyPatch, ): - new_rabbitmq_fetch_filenames_client = RabbitMQFetchFilenamesClient( - host=rabbitmq_fetch_filenames_client._host, - port=rabbitmq_fetch_filenames_client._port, - credentials_service=rabbitmq_fetch_filenames_client._credentials_service, - queue=rabbitmq_fetch_filenames_client._queue, + new_rabbitmq_fetch_filenames_stream_client = RabbitMQFetchFilenameStreamClient( + host=rabbitmq_fetch_filenames_stream_client._host, + port=rabbitmq_fetch_filenames_stream_client._port, + credentials_service=rabbitmq_fetch_filenames_stream_client._credentials_service, + queue=rabbitmq_fetch_filenames_stream_client._queue, polling_timeout=1, ) @@ -123,7 +133,7 @@ def test_fetch_single_ack_exception_resilience( channel.basic_publish( exchange="", - routing_key=rabbitmq_fetch_filenames_client._queue, + routing_key=rabbitmq_fetch_filenames_stream_client._queue, body=filename, properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), ) @@ -144,5 +154,10 @@ def mock_failed_ack( monkeypatch.setattr(pika.channel.Channel, "basic_ack", mock_failed_ack) - for fetched_filename in new_rabbitmq_fetch_filenames_client.fetch(): + for ( + fetched_filename, + receipt, + ) in new_rabbitmq_fetch_filenames_stream_client.fetch_stream(): + monkeypatch.undo() assert fetched_filename == filename + assert new_rabbitmq_fetch_filenames_stream_client.ack(receipt) diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_poll_until_timeout.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_poll_until_timeout.py index 6039dab..7972df3 100644 --- a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_poll_until_timeout.py +++ b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_poll_until_timeout.py @@ -1,7 +1,9 @@ import time import pytest from .utils import random_csv_filenames -from src.adapters.fetch_filenames.rabbitmq import RabbitMQFetchFilenamesClient +from src.adapters.fetch_filenames_stream.rabbitmq import ( + RabbitMQFetchFilenameStreamClient, +) import pika import pytest @@ -9,15 +11,15 @@ @pytest.mark.smoke @pytest.mark.parametrize("timeout", [0.5 * i for i in range(1, 5)]) def test_fetch_none_wait_timeout( - rabbitmq_fetch_filenames_client: RabbitMQFetchFilenamesClient, + rabbitmq_fetch_filenames_stream_client: RabbitMQFetchFilenameStreamClient, raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], timeout: int, ): - new_rabbitmq_fetch_filenames_client = RabbitMQFetchFilenamesClient( - host=rabbitmq_fetch_filenames_client._host, - port=rabbitmq_fetch_filenames_client._port, - credentials_service=rabbitmq_fetch_filenames_client._credentials_service, - queue=rabbitmq_fetch_filenames_client._queue, + new_rabbitmq_fetch_filenames_stream_client = RabbitMQFetchFilenameStreamClient( + host=rabbitmq_fetch_filenames_stream_client._host, + port=rabbitmq_fetch_filenames_stream_client._port, + credentials_service=rabbitmq_fetch_filenames_stream_client._credentials_service, + queue=rabbitmq_fetch_filenames_stream_client._queue, polling_timeout=timeout, ) @@ -38,8 +40,12 @@ def test_fetch_none_wait_timeout( start_time = time.perf_counter() - for fetched_filename in new_rabbitmq_fetch_filenames_client.fetch(): + for ( + fetched_filename, + receipt, + ) in new_rabbitmq_fetch_filenames_stream_client.fetch_stream(): assert fetched_filename == filename + assert new_rabbitmq_fetch_filenames_stream_client.ack(receipt) end_time = time.perf_counter() diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_reject_failed.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_reject_failed.py new file mode 100644 index 0000000..ab28578 --- /dev/null +++ b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_reject_failed.py @@ -0,0 +1,41 @@ +import pytest +from .utils import random_csv_filenames +from src.adapters.fetch_filenames_stream.rabbitmq import ( + RabbitMQFetchFilenameStreamClient, +) +import pika +from pytest import MonkeyPatch + + +@pytest.mark.parametrize("filename", random_csv_filenames()) +def test_fetch_single_reject_retain_data_in_stream( + rabbitmq_fetch_filenames_stream_no_wait_client: RabbitMQFetchFilenameStreamClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + filename: str, + monkeypatch: MonkeyPatch, +): + conn, queue = raw_rabbitmq_pika_conn_config + + channel = conn.channel() + + channel.queue_declare(queue=queue, durable=True) + + channel.basic_publish( + exchange="", + routing_key=rabbitmq_fetch_filenames_stream_no_wait_client._queue, + body=filename, + properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), + ) + + for ( + fetched_filename, + receipt, + ) in rabbitmq_fetch_filenames_stream_no_wait_client.fetch_stream(): + assert fetched_filename == filename + + def mock_ack(self, *args, **kwargs): + raise Exception("Failed to reject!") + + monkeypatch.setattr(pika.channel.Channel, "basic_nack", mock_ack) + + assert not rabbitmq_fetch_filenames_stream_no_wait_client.reject(receipt) diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_reject_retain_data_in_stream.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_reject_retain_data_in_stream.py new file mode 100644 index 0000000..716f4d9 --- /dev/null +++ b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_reject_retain_data_in_stream.py @@ -0,0 +1,94 @@ +import pytest +from .utils import random_csv_filenames +from src.adapters.fetch_filenames_stream.rabbitmq import ( + RabbitMQFetchFilenameStreamClient, +) +import pika +import pytest + + +@pytest.mark.smoke +@pytest.mark.parametrize("filename", random_csv_filenames()) +def test_fetch_single_reject_retain_data_in_stream( + rabbitmq_fetch_filenames_stream_no_wait_client: RabbitMQFetchFilenameStreamClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + filename: str, +): + conn, queue = raw_rabbitmq_pika_conn_config + + channel = conn.channel() + + channel.queue_declare(queue=queue, durable=True) + + channel.basic_publish( + exchange="", + routing_key=rabbitmq_fetch_filenames_stream_no_wait_client._queue, + body=filename, + properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), + ) + + for ( + fetched_filename, + receipt, + ) in rabbitmq_fetch_filenames_stream_no_wait_client.fetch_stream(): + assert fetched_filename == filename + + assert rabbitmq_fetch_filenames_stream_no_wait_client.reject(receipt) + + for ( + fetched_filename, + receipt, + ) in rabbitmq_fetch_filenames_stream_no_wait_client.fetch_stream(): + assert fetched_filename == filename + assert rabbitmq_fetch_filenames_stream_no_wait_client.ack(receipt) + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "filenames", + [random_csv_filenames() for _ in range(5)], +) +def test_fetch_batch_reject_retain_data_in_stream( + rabbitmq_fetch_filenames_stream_no_wait_client: RabbitMQFetchFilenameStreamClient, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + filenames: list[str], +): + conn, queue = raw_rabbitmq_pika_conn_config + + channel = conn.channel() + + channel.queue_declare(queue=queue, durable=True) + + for filename in filenames: + channel.basic_publish( + exchange="", + routing_key=queue, + body=filename, + properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), + ) + + all_filenames = [] + all_receipts = [] + for ( + filename, + receipt, + ) in rabbitmq_fetch_filenames_stream_no_wait_client.fetch_stream(): + all_filenames.append(filename) + all_receipts.append(receipt) + + assert sorted(all_filenames) == sorted(filenames) + + assert all(rabbitmq_fetch_filenames_stream_no_wait_client.reject(all_receipts)) + + new_all_filenames = [] + new_all_receipts = [] + for ( + filename, + receipt, + ) in rabbitmq_fetch_filenames_stream_no_wait_client.fetch_stream(): + new_all_filenames.append(filename) + new_all_receipts.append(receipt) + + assert sorted(new_all_filenames) == sorted(all_filenames) + + assert all(rabbitmq_fetch_filenames_stream_no_wait_client.ack(new_all_receipts)) diff --git a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_successful_fetch.py b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_successful_fetch.py index 8fdeb4c..4b880ce 100644 --- a/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_successful_fetch.py +++ b/consumer/tests/test_adapters/test_fetch_filenames/test_rabbitmq/test_successful_fetch.py @@ -1,6 +1,8 @@ import pytest from .utils import random_csv_filenames -from src.adapters.fetch_filenames.rabbitmq import RabbitMQFetchFilenamesClient +from src.adapters.fetch_filenames_stream.rabbitmq import ( + RabbitMQFetchFilenameStreamClient, +) import pika import pytest @@ -8,7 +10,7 @@ @pytest.mark.smoke @pytest.mark.parametrize("filename", random_csv_filenames()) def test_fetch_single_success( - rabbitmq_fetch_filenames_no_wait_client: RabbitMQFetchFilenamesClient, + rabbitmq_fetch_filenames_stream_no_wait_client: RabbitMQFetchFilenameStreamClient, raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], filename: str, ): @@ -20,13 +22,17 @@ def test_fetch_single_success( channel.basic_publish( exchange="", - routing_key=rabbitmq_fetch_filenames_no_wait_client._queue, + routing_key=rabbitmq_fetch_filenames_stream_no_wait_client._queue, body=filename, properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), ) - for fetched_filename in rabbitmq_fetch_filenames_no_wait_client.fetch(): + for ( + fetched_filename, + receipt, + ) in rabbitmq_fetch_filenames_stream_no_wait_client.fetch_stream(): assert fetched_filename == filename + assert rabbitmq_fetch_filenames_stream_no_wait_client.ack(receipt) @pytest.mark.smoke @@ -34,8 +40,8 @@ def test_fetch_single_success( "filenames", [random_csv_filenames() for _ in range(5)], ) -def test_publish_batch_success( - rabbitmq_fetch_filenames_no_wait_client: RabbitMQFetchFilenamesClient, +def test_fetch_batch_success( + rabbitmq_fetch_filenames_stream_no_wait_client: RabbitMQFetchFilenameStreamClient, raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], filenames: list[str], ): @@ -54,7 +60,11 @@ def test_publish_batch_success( ) all_filenames = [] - for filename in rabbitmq_fetch_filenames_no_wait_client.fetch(): + for ( + filename, + receipt, + ) in rabbitmq_fetch_filenames_stream_no_wait_client.fetch_stream(): all_filenames.append(filename) + assert rabbitmq_fetch_filenames_stream_no_wait_client.ack(receipt) assert sorted(all_filenames) == sorted(filenames) diff --git a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/conftest.py b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/conftest.py index 663ddf9..c964d6c 100644 --- a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/conftest.py +++ b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/conftest.py @@ -84,4 +84,5 @@ def csv_parse_iot_records_client() -> CSVParseIOTRecordsClient: return CSVParseIOTRecordsClient( recognized_datetime_formats=CSVParserConfig.RECOGNIZED_DATETIME_FORMATS, delimiter=CSVParserConfig.DELIMITER, + file_extension=CSVParserConfig.FILE_EXTENSION, ) diff --git a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_file_error_return_none.py b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_file_error_return_none.py new file mode 100644 index 0000000..0cc315c --- /dev/null +++ b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_file_error_return_none.py @@ -0,0 +1,68 @@ +import pytest +from src.adapters.file_parse_iot_records.csv import CSVParseIOTRecordsClient +from pytest import FixtureRequest, MonkeyPatch, LogCaptureFixture + + +@pytest.mark.parametrize( + "fixture_name", + [ + "random_valid_csv_file", + "random_invalid_datetime_and_value_csv_file", + "random_invalid_datetime_csv_file", + "random_invalid_value_csv_file", + ] + * 5, +) +def test_parse_single_failed_open_file_return_none( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_name: str, + request: FixtureRequest, + caplog: LogCaptureFixture, + monkeypatch: MonkeyPatch, +): + random_csv_file: str = request.getfixturevalue(fixture_name) + + def mock_open(*args, **kwargs): + raise OSError("Failed to open file!") + + monkeypatch.setattr("builtins.open", mock_open) + + with caplog.at_level("ERROR"): + assert csv_parse_iot_records_client.parse(random_csv_file) is None + assert "Failed to open file!" in caplog.text + + +@pytest.mark.parametrize( + "fixture_names", + [ + tuple( + [ + "random_valid_csv_file", + "random_invalid_datetime_and_value_csv_file", + "random_invalid_datetime_csv_file", + "random_invalid_value_csv_file", + ] + ) + for _ in range(5) + ], +) +def test_parse_batch_failed_open_file_return_none( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_names: tuple[str, ...], + request: FixtureRequest, + caplog: LogCaptureFixture, + monkeypatch: MonkeyPatch, +): + random_csv_files: list[str] = [ + request.getfixturevalue(fixture_name) for fixture_name in fixture_names + ] + + def mock_open(*args, **kwargs): + raise OSError("Failed to open file!") + + monkeypatch.setattr("builtins.open", mock_open) + + with caplog.at_level("ERROR"): + for parsed_record in csv_parse_iot_records_client.parse(random_csv_files): + assert parsed_record is None + assert "Failed to open file!" in caplog.text diff --git a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_file_not_exists.py b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_file_not_exists.py new file mode 100644 index 0000000..6a672b0 --- /dev/null +++ b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_file_not_exists.py @@ -0,0 +1,82 @@ +import pytest +from src.adapters.file_parse_iot_records.csv import CSVParseIOTRecordsClient +from pytest import FixtureRequest, LogCaptureFixture +from src.entities import IOTRecord + + +@pytest.mark.parametrize( + "fixture_name", + [ + "random_valid_csv_file", + ] + * 5, +) +def test_parse_single_file_not_exists_failed( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_name: str, + request: FixtureRequest, + caplog: LogCaptureFixture, +): + random_file: str = request.getfixturevalue(fixture_name) + random_file = random_file.replace(".csv", "") + + with caplog.at_level("ERROR"): + iot_records = csv_parse_iot_records_client.parse(random_file) + assert iot_records is None + assert f"Failed to parse {random_file}" in caplog.text + assert "File path must exist!" in caplog.text + + +@pytest.mark.parametrize( + "fixture_name", + ["random_valid_csv_file"] * 5, +) +def test_parse_stream_file_not_exists_failed( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_name: str, + request: FixtureRequest, + caplog: LogCaptureFixture, +): + random_file: str = request.getfixturevalue(fixture_name) + random_file = random_file.replace(".csv", "") + + all_iot_records: list[IOTRecord] = [] + with caplog.at_level("ERROR"): + for iot_record in csv_parse_iot_records_client.parse_stream(random_file): + assert isinstance(iot_record, IOTRecord) + all_iot_records.append(iot_record) + assert len(all_iot_records) == 0 + assert f"Failed to parse {random_file}" in caplog.text + assert "File path must exist!" in caplog.text + + +@pytest.mark.parametrize( + "fixture_names", + [ + tuple( + [ + "random_valid_csv_file", + ] + * 5 + ) + for _ in range(5) + ], +) +def test_parse_batch_file_not_exists_failed( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_names: tuple[str, ...], + request: FixtureRequest, + caplog: LogCaptureFixture, +): + random_files: list[str] = [ + request.getfixturevalue(fixture_name) for fixture_name in fixture_names + ] + + random_files = [random_file.replace(".csv", "") for random_file in random_files] + + with caplog.at_level("ERROR"): + iot_records = csv_parse_iot_records_client.parse(random_files) + for random_file, iot_record in zip(random_files, iot_records): + assert iot_record is None + assert f"Failed to parse {random_file}" in caplog.text + assert "File path must exist!" in caplog.text diff --git a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_file_stream_error_raise.py b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_file_stream_error_raise.py new file mode 100644 index 0000000..34aec8b --- /dev/null +++ b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_file_stream_error_raise.py @@ -0,0 +1,35 @@ +import pytest +from src.adapters.file_parse_iot_records.csv import CSVParseIOTRecordsClient +from pytest import FixtureRequest, MonkeyPatch, LogCaptureFixture + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "fixture_name", + [ + "random_valid_csv_file", + "random_invalid_datetime_and_value_csv_file", + "random_invalid_datetime_csv_file", + "random_invalid_value_csv_file", + ] + * 5, +) +def test_parse_stream_failed_open_file_raise( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + fixture_name: str, + request: FixtureRequest, + caplog: LogCaptureFixture, + monkeypatch: MonkeyPatch, +): + random_csv_file: str = request.getfixturevalue(fixture_name) + + def mock_open(*args, **kwargs): + raise OSError("Failed to open file!") + + monkeypatch.setattr("builtins.open", mock_open) + + with caplog.at_level("ERROR"): + with pytest.raises(Exception, match="^Failed to open file!$"): + for _ in csv_parse_iot_records_client.parse_stream(random_csv_file): + pass + assert "Failed to open file!" in caplog.text diff --git a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_open_file.py b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_open_file.py deleted file mode 100644 index ee1a637..0000000 --- a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_open_file.py +++ /dev/null @@ -1,110 +0,0 @@ -import pytest -from src.adapters.file_parse_iot_records.csv import CSVParseIOTRecordsClient -from pytest import FixtureRequest, MonkeyPatch, LogCaptureFixture -from src.entities import IOTRecord - - -@pytest.mark.smoke -@pytest.mark.parametrize( - "fixture_name", - [ - "random_valid_csv_file", - "random_invalid_datetime_and_value_csv_file", - "random_invalid_datetime_csv_file", - "random_invalid_value_csv_file", - ] - * 5, -) -def test_parse_single_failed_open_file( - csv_parse_iot_records_client: CSVParseIOTRecordsClient, - fixture_name: str, - request: FixtureRequest, - caplog: LogCaptureFixture, - monkeypatch: MonkeyPatch, -): - random_csv_file: str = request.getfixturevalue(fixture_name) - - def mock_open(*args, **kwargs): - raise FileNotFoundError("Failed to open file!") - - monkeypatch.setattr("builtins.open", mock_open) - - with caplog.at_level("ERROR"): - iot_records = csv_parse_iot_records_client.parse(random_csv_file) - assert len(iot_records) == 0 - assert f"Failed to parse {random_csv_file}" in caplog.text - assert "Failed to open file!" in caplog.text - - -@pytest.mark.smoke -@pytest.mark.parametrize( - "fixture_name", - [ - "random_valid_csv_file", - "random_invalid_datetime_and_value_csv_file", - "random_invalid_datetime_csv_file", - "random_invalid_value_csv_file", - ] - * 5, -) -def test_parse_stream_failed_open_file( - csv_parse_iot_records_client: CSVParseIOTRecordsClient, - fixture_name: str, - request: FixtureRequest, - caplog: LogCaptureFixture, - monkeypatch: MonkeyPatch, -): - random_csv_file: str = request.getfixturevalue(fixture_name) - - def mock_open(*args, **kwargs): - raise FileNotFoundError("Failed to open file!") - - monkeypatch.setattr("builtins.open", mock_open) - - all_iot_records: list[IOTRecord] = [] - with caplog.at_level("ERROR"): - for iot_record in csv_parse_iot_records_client.parse_stream(random_csv_file): - assert isinstance(iot_record, IOTRecord) - all_iot_records.append(iot_record) - assert len(all_iot_records) == 0 - assert f"Failed to parse {random_csv_file}" in caplog.text - assert "Failed to open file!" in caplog.text - - -@pytest.mark.smoke -@pytest.mark.parametrize( - "fixture_names", - [ - tuple( - [ - "random_valid_csv_file", - "random_invalid_datetime_and_value_csv_file", - "random_invalid_datetime_csv_file", - "random_invalid_value_csv_file", - ] - ) - for _ in range(5) - ], -) -def test_parse_batch_failed_open_file( - csv_parse_iot_records_client: CSVParseIOTRecordsClient, - fixture_names: tuple[str, ...], - request: FixtureRequest, - caplog: LogCaptureFixture, - monkeypatch: MonkeyPatch, -): - random_csv_files: list[str] = [ - request.getfixturevalue(fixture_name) for fixture_name in fixture_names - ] - - def mock_open(*args, **kwargs): - raise FileNotFoundError("Failed to open file!") - - monkeypatch.setattr("builtins.open", mock_open) - - with caplog.at_level("ERROR"): - iot_records = csv_parse_iot_records_client.parse(random_csv_files) - for random_csv_file, iot_record in zip(random_csv_files, iot_records): - assert len(iot_record) == 0 - assert f"Failed to parse {random_csv_file}" in caplog.text - assert "Failed to open file!" in caplog.text diff --git a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_other_file_formats.py b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_other_file_formats.py index 9f1188d..5b32c3b 100644 --- a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_other_file_formats.py +++ b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_other_file_formats.py @@ -29,7 +29,7 @@ def test_parse_single_other_format_failed( with caplog.at_level("ERROR"): iot_records = csv_parse_iot_records_client.parse(random_file) - assert len(iot_records) == 0 + assert iot_records is None assert f"Failed to parse {random_file}" in caplog.text assert ( f"File extension must be {csv_parse_iot_records_client._file_extension}" @@ -105,7 +105,7 @@ def test_parse_batch_other_format_failed( with caplog.at_level("ERROR"): iot_records = csv_parse_iot_records_client.parse(random_files) for random_file, iot_record in zip(random_files, iot_records): - assert len(iot_record) == 0 + assert iot_record is None assert f"Failed to parse {random_file}" in caplog.text assert ( f"File extension must be {csv_parse_iot_records_client._file_extension}" diff --git a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_parse_dir.py b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_parse_dir.py new file mode 100644 index 0000000..f37141d --- /dev/null +++ b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_parse_dir.py @@ -0,0 +1,53 @@ +import pytest +from src.adapters.file_parse_iot_records.csv import CSVParseIOTRecordsClient +from pytest import FixtureRequest, LogCaptureFixture +from src.entities import IOTRecord +from pathlib import Path + + +def test_parse_single_dir_failed( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + caplog: LogCaptureFixture, + tmp_path: Path, +): + with caplog.at_level("ERROR"): + iot_records = csv_parse_iot_records_client.parse(str(tmp_path)) + assert iot_records is None + assert f"Failed to parse {tmp_path}" in caplog.text + assert "File path must be a file!" in caplog.text + + +def test_parse_stream_dir_failed( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + caplog: LogCaptureFixture, + tmp_path: Path, +): + all_iot_records: list[IOTRecord] = [] + with caplog.at_level("ERROR"): + for iot_record in csv_parse_iot_records_client.parse_stream(str(tmp_path)): + assert isinstance(iot_record, IOTRecord) + all_iot_records.append(iot_record) + assert len(all_iot_records) == 0 + assert f"Failed to parse {tmp_path}" in caplog.text + assert "File path must be a file!" in caplog.text + + +def test_parse_batch_other_format_failed( + csv_parse_iot_records_client: CSVParseIOTRecordsClient, + caplog: LogCaptureFixture, + tmp_path: Path, +): + tmp_paths = [] + for i in range(5): + new_tmp_path = tmp_path / f"random_valid_tsv_file{i}" + new_tmp_path.mkdir(parents=True, exist_ok=True) + tmp_paths.append(new_tmp_path) + + tmp_paths_str = [str(tmp_path) for tmp_path in tmp_paths] + + with caplog.at_level("ERROR"): + iot_records = csv_parse_iot_records_client.parse(tmp_paths_str) + for random_file, iot_record in zip(tmp_paths_str, iot_records): + assert iot_record is None + assert f"Failed to parse {random_file}" in caplog.text + assert "File path must be a file!" in caplog.text diff --git a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/utils.py b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/utils.py index fa5b48d..e7980b2 100644 --- a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/utils.py +++ b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/utils.py @@ -80,10 +80,8 @@ def random_invalid_value_rows() -> list[tuple[str, ...]]: def random_invalid_datetime_and_value_rows() -> list[tuple[str, ...]]: rows = [] all_datetime_formats = [ - "%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%dT%H:%M:%S%z", "%Y-%m-%dT%H:%M%z", - "%Y-%m-%d %H:%M:%S.%f%z", "%Y-%m-%d %H:%M:%S%z", "%Y-%m-%d %H:%M%z", ] diff --git a/consumer/tests/test_deployments/__init__.py b/consumer/tests/test_deployments/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/consumer/tests/test_deployments/test_script/__init__.py b/consumer/tests/test_deployments/test_script/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/consumer/tests/test_deployments/test_script/test_main/__init__.py b/consumer/tests/test_deployments/test_script/test_main/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/consumer/tests/test_deployments/test_script/test_main/conftest.py b/consumer/tests/test_deployments/test_script/test_main/conftest.py new file mode 100644 index 0000000..1cb77e5 --- /dev/null +++ b/consumer/tests/test_deployments/test_script/test_main/conftest.py @@ -0,0 +1,170 @@ +from src.deployments.script.config import RabbitMQConfig, PostgresConfig +import pika +import pytest +import psycopg2 +from pytest import TempdirFactory +from pathlib import Path +from .utils import ( + random_csv_file, + random_tsv_file, + random_ndjson_file, + random_invalid_datetime_rows, + random_invalid_datetime_and_value_rows, + random_invalid_value_rows, + random_valid_format_rows, +) + + +@pytest.fixture(scope="session") +def setup_tempdir(tmpdir_factory: TempdirFactory) -> Path: + return Path(tmpdir_factory.mktemp("artifact")) + + +@pytest.fixture(scope="session") +def raw_postgres_psycopg2_conn_config() -> psycopg2.extensions.connection: + with psycopg2.connect( + host=PostgresConfig.HOST, + port=PostgresConfig.PORT, + user=PostgresConfig.USERNAME, + password=PostgresConfig.PASSWORD, + database=PostgresConfig.DATABASE, + ) as conn: + yield conn + + +@pytest.fixture(scope="session") +def raw_rabbitmq_pika_conn_config() -> tuple[pika.BaseConnection, str]: + pika_conn = pika.BlockingConnection( + pika.ConnectionParameters( + host=RabbitMQConfig.HOST, + port=RabbitMQConfig.PORT, + credentials=pika.PlainCredentials( + RabbitMQConfig.USERNAME, RabbitMQConfig.PASSWORD + ), + ) + ) + return pika_conn, RabbitMQConfig.QUEUE + + +@pytest.fixture(scope="session", autouse=True) +def init_postgres_tables( + raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, +) -> None: + with raw_postgres_psycopg2_conn_config.cursor() as cursor: + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS records ( + record_time TIMESTAMPTZ NOT NULL, + sensor_id TEXT NOT NULL, + value DOUBLE PRECISION NOT NULL, + PRIMARY KEY(record_time, sensor_id) + ); + + CREATE INDEX IF NOT EXISTS idx_records_record_time ON records USING BRIN (record_time); + CREATE INDEX IF NOT EXISTS idx_records_sensor_id ON records USING HASH (sensor_id); + """ + ) + raw_postgres_psycopg2_conn_config.commit() + + +@pytest.fixture(scope="function", autouse=True) +def setup_teardown_rabbitmq_queue( + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], +) -> None: + pika_conn, queue = raw_rabbitmq_pika_conn_config + + channel = pika_conn.channel() + channel.queue_declare(queue=queue, durable=True) + channel.queue_purge(queue=queue) + yield + channel.queue_purge(queue=queue) + + +@pytest.fixture(scope="function", autouse=True) +def setup_teardown_postgres_tables( + raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, +) -> None: + with raw_postgres_psycopg2_conn_config.cursor() as cursor: + try: + cursor.execute( + """ + TRUNCATE TABLE records; + """ + ) + raw_postgres_psycopg2_conn_config.commit() + yield + except Exception as e: + raw_postgres_psycopg2_conn_config.rollback() + raise e + finally: + cursor.execute( + """ + TRUNCATE TABLE records; + """ + ) + raw_postgres_psycopg2_conn_config.commit() + + +@pytest.fixture(scope="session") +def setup_tempdir(tmpdir_factory: TempdirFactory) -> Path: + return Path(tmpdir_factory.mktemp("artifact")) + + +@pytest.fixture(scope="function") +def random_valid_csv_file(setup_tempdir: Path) -> Path: + return random_csv_file(setup_tempdir, random_valid_format_rows()) + + +@pytest.fixture(scope="function") +def random_invalid_datetime_and_value_csv_file(setup_tempdir: Path) -> Path: + return random_csv_file(setup_tempdir, random_invalid_datetime_and_value_rows()) + + +@pytest.fixture(scope="function") +def random_invalid_datetime_csv_file(setup_tempdir: Path) -> Path: + return random_csv_file(setup_tempdir, random_invalid_datetime_rows()) + + +@pytest.fixture(scope="function") +def random_invalid_value_csv_file(setup_tempdir: Path) -> Path: + return random_csv_file(setup_tempdir, random_invalid_value_rows()) + + +@pytest.fixture(scope="function") +def random_valid_tsv_file(setup_tempdir: Path) -> Path: + return random_tsv_file(setup_tempdir, random_valid_format_rows()) + + +@pytest.fixture(scope="function") +def random_invalid_datetime_and_value_tsv_file(setup_tempdir: Path) -> Path: + return random_tsv_file(setup_tempdir, random_invalid_datetime_and_value_rows()) + + +@pytest.fixture(scope="function") +def random_invalid_datetime_tsv_file(setup_tempdir: Path) -> Path: + return random_tsv_file(setup_tempdir, random_invalid_datetime_rows()) + + +@pytest.fixture(scope="function") +def random_invalid_value_tsv_file(setup_tempdir: Path) -> Path: + return random_tsv_file(setup_tempdir, random_invalid_value_rows()) + + +@pytest.fixture(scope="function") +def random_valid_ndjson_file(setup_tempdir: Path) -> Path: + return random_ndjson_file(setup_tempdir, random_valid_format_rows()) + + +@pytest.fixture(scope="function") +def random_invalid_datetime_and_value_ndjson_file(setup_tempdir: Path) -> Path: + return random_ndjson_file(setup_tempdir, random_invalid_datetime_and_value_rows()) + + +@pytest.fixture(scope="function") +def random_invalid_datetime_ndjson_file(setup_tempdir: Path) -> Path: + return random_ndjson_file(setup_tempdir, random_invalid_datetime_rows()) + + +@pytest.fixture(scope="function") +def random_invalid_value_ndjson_file(setup_tempdir: Path) -> Path: + return random_ndjson_file(setup_tempdir, random_invalid_value_rows()) diff --git a/consumer/tests/test_deployments/test_script/test_main/test_main_failed_read_stream_raise.py b/consumer/tests/test_deployments/test_script/test_main/test_main_failed_read_stream_raise.py new file mode 100644 index 0000000..7c598c8 --- /dev/null +++ b/consumer/tests/test_deployments/test_script/test_main/test_main_failed_read_stream_raise.py @@ -0,0 +1,48 @@ +from typing import Iterator +from src.deployments.script.main import main +from src.deployments.script.config import RabbitMQConfig +from src.adapters.fetch_filenames_stream.rabbitmq import ( + RabbitMQFetchFilenameStreamClient, +) +import pytest +from pytest import MonkeyPatch, FixtureRequest +import pika + + +@pytest.mark.smoke +@pytest.mark.parametrize( + "fixture_name", + ["random_valid_csv_file"] * 5, +) +def test_main_flow_single_read_stream_failed_raise( + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + fixture_name: str, + request: FixtureRequest, + monkeypatch: MonkeyPatch, +): + random_csv_file: str = request.getfixturevalue(fixture_name) + + conn, queue = raw_rabbitmq_pika_conn_config + channel = conn.channel() + channel.queue_declare(queue=queue, durable=True) + + channel.basic_publish( + exchange="", + routing_key=queue, + body=random_csv_file, + properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), + ) + + def mock_read(self, *args, **kwargs) -> Iterator[str]: + raise IOError("Cannot read stream!") + + monkeypatch.setattr(RabbitMQFetchFilenameStreamClient, "fetch_stream", mock_read) + + monkeypatch.setattr(RabbitMQConfig, "POLLING_TIMEOUT", 1) + + with pytest.raises(IOError, match="^Cannot read stream!$"): + main() + + method_frame, _, body = channel.basic_get(queue=queue) + assert method_frame is not None + assert body.decode() == random_csv_file diff --git a/consumer/tests/test_deployments/test_script/test_main/test_main_read_file_resilience.py b/consumer/tests/test_deployments/test_script/test_main/test_main_read_file_resilience.py new file mode 100644 index 0000000..6883813 --- /dev/null +++ b/consumer/tests/test_deployments/test_script/test_main/test_main_read_file_resilience.py @@ -0,0 +1,87 @@ +from src.deployments.script.main import main +from src.deployments.script.config import RabbitMQConfig +import pytest +from pytest import MonkeyPatch, LogCaptureFixture, FixtureRequest +import pika +import psycopg2 +import csv +from datetime import datetime +from decimal import Decimal + + +@pytest.mark.parametrize( + "fixture_name", + ["random_valid_csv_file"] * 5, +) +def test_main_flow_single_cannot_read_file_throw_error( + raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + fixture_name: str, + request: FixtureRequest, + monkeypatch: MonkeyPatch, + caplog: LogCaptureFixture, +): + random_csv_file: str = request.getfixturevalue(fixture_name) + + conn, queue = raw_rabbitmq_pika_conn_config + channel = conn.channel() + channel.queue_declare(queue=queue, durable=True) + + channel.basic_publish( + exchange="", + routing_key=queue, + body=random_csv_file, + properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), + ) + + monkeypatch.setattr(RabbitMQConfig, "POLLING_TIMEOUT", 1) + + counter = 0 + + def mock_open(*args, **kwargs): + nonlocal counter + counter += 1 + if counter == 1: + monkeypatch.undo() + raise OSError("Cannot read file!") + + monkeypatch.setattr("builtins.open", mock_open) + + with caplog.at_level("INFO"): + main() + assert "Cannot read file!" in caplog.text + assert f"Failed to upsert {random_csv_file}!" in caplog.text + assert f"Successfully upserted {random_csv_file}!" in caplog.text + + with open(random_csv_file, "r") as f: + reader = csv.reader(f) + for row in reader: + record_time, sensor_id, value = row + + record_time_dt = datetime.fromisoformat(record_time) + value_dec = Decimal(value) + + with raw_postgres_psycopg2_conn_config.cursor() as cursor: + cursor.execute( + """ + SELECT record_time, sensor_id, value + FROM records + WHERE record_time = %s AND sensor_id = %s; + """, + (record_time_dt, sensor_id), + ) + + ( + fetched_record_time, + fetched_sensor_id, + fetched_value, + ) = cursor.fetchone() + + assert fetched_record_time == record_time_dt + assert fetched_sensor_id == sensor_id + assert pytest.approx(value_dec) == fetched_value + + method_frame, header_frame, body = channel.basic_get(queue=queue) + assert method_frame is None + assert header_frame is None + assert body is None diff --git a/consumer/tests/test_deployments/test_script/test_main/test_main_successful.py b/consumer/tests/test_deployments/test_script/test_main/test_main_successful.py new file mode 100644 index 0000000..e140c9b --- /dev/null +++ b/consumer/tests/test_deployments/test_script/test_main/test_main_successful.py @@ -0,0 +1,279 @@ +from src.deployments.script.main import main +from src.deployments.script.config import RabbitMQConfig, PostgresConfig +import pytest +from pytest import MonkeyPatch, LogCaptureFixture, FixtureRequest +import pika +import psycopg2 +import csv +from datetime import datetime +from decimal import Decimal + + +@pytest.mark.parametrize( + "fixture_name", + ["random_valid_csv_file"] * 5, +) +def test_main_flow_single_no_failed_files( + raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + fixture_name: str, + request: FixtureRequest, + monkeypatch: MonkeyPatch, + caplog: LogCaptureFixture, +): + random_csv_file: str = request.getfixturevalue(fixture_name) + + conn, queue = raw_rabbitmq_pika_conn_config + channel = conn.channel() + channel.queue_declare(queue=queue, durable=True) + + channel.basic_publish( + exchange="", + routing_key=queue, + body=random_csv_file, + properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), + ) + + monkeypatch.setattr(RabbitMQConfig, "POLLING_TIMEOUT", 1) + + with caplog.at_level("INFO"): + main() + assert f"Successfully upserted {random_csv_file}!" in caplog.text + + with open(random_csv_file, "r") as f: + reader = csv.reader(f) + for row in reader: + record_time, sensor_id, value = row + + record_time_dt = datetime.fromisoformat(record_time) + value_dec = Decimal(value) + + with raw_postgres_psycopg2_conn_config.cursor() as cursor: + cursor.execute( + """ + SELECT record_time, sensor_id, value + FROM records + WHERE record_time = %s AND sensor_id = %s; + """, + (record_time_dt, sensor_id), + ) + + ( + fetched_record_time, + fetched_sensor_id, + fetched_value, + ) = cursor.fetchone() + + assert fetched_record_time == record_time_dt + assert fetched_sensor_id == sensor_id + assert pytest.approx(value_dec) == fetched_value + + method_frame, header_frame, body = channel.basic_get(queue=queue) + assert method_frame is None + assert header_frame is None + assert body is None + + +@pytest.mark.parametrize( + "fixture_names", + [tuple(["random_valid_csv_file"] * 5) for _ in range(5)], +) +def test_main_flow_batch_no_failed_files( + raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + fixture_names: tuple[str, ...], + request: FixtureRequest, + monkeypatch: MonkeyPatch, + caplog: LogCaptureFixture, +): + random_csv_files: list[str] = [ + request.getfixturevalue(fixture_name) for fixture_name in fixture_names + ] + + conn, queue = raw_rabbitmq_pika_conn_config + channel = conn.channel() + channel.queue_declare(queue=queue, durable=True) + + for random_csv_file in random_csv_files: + channel.basic_publish( + exchange="", + routing_key=queue, + body=random_csv_file, + properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), + ) + + monkeypatch.setattr(RabbitMQConfig, "POLLING_TIMEOUT", 1) + + with caplog.at_level("INFO"): + main() + assert f"Successfully upserted {random_csv_file}!" in caplog.text + + for random_csv_file in random_csv_files: + with open(random_csv_file, "r") as f: + reader = csv.reader(f) + for row in reader: + record_time, sensor_id, value = row + + record_time_dt = datetime.fromisoformat(record_time) + value_dec = Decimal(value) + + with raw_postgres_psycopg2_conn_config.cursor() as cursor: + cursor.execute( + """ + SELECT record_time, sensor_id, value + FROM records + WHERE record_time = %s AND sensor_id = %s; + """, + (record_time_dt, sensor_id), + ) + + ( + fetched_record_time, + fetched_sensor_id, + fetched_value, + ) = cursor.fetchone() + + assert fetched_record_time == record_time_dt + assert fetched_sensor_id == sensor_id + assert pytest.approx(value_dec) == fetched_value + + method_frame, header_frame, body = channel.basic_get(queue=queue) + assert method_frame is None + assert header_frame is None + assert body is None + + +@pytest.mark.parametrize( + "fixture_name", + ["random_valid_csv_file"] * 5, +) +def test_main_flow_single_in_batch_no_failed_files( + raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + fixture_name: str, + request: FixtureRequest, + monkeypatch: MonkeyPatch, + caplog: LogCaptureFixture, +): + random_csv_file: str = request.getfixturevalue(fixture_name) + + conn, queue = raw_rabbitmq_pika_conn_config + channel = conn.channel() + channel.queue_declare(queue=queue, durable=True) + + channel.basic_publish( + exchange="", + routing_key=queue, + body=random_csv_file, + properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), + ) + + monkeypatch.setattr(RabbitMQConfig, "POLLING_TIMEOUT", 1) + + monkeypatch.setattr(PostgresConfig, "BATCH_UPSERT_SIZE", 3) + + with caplog.at_level("INFO"): + main() + assert f"Successfully upserted {random_csv_file}!" in caplog.text + + with open(random_csv_file, "r") as f: + reader = csv.reader(f) + for row in reader: + record_time, sensor_id, value = row + + record_time_dt = datetime.fromisoformat(record_time) + value_dec = Decimal(value) + + with raw_postgres_psycopg2_conn_config.cursor() as cursor: + cursor.execute( + """ + SELECT record_time, sensor_id, value + FROM records + WHERE record_time = %s AND sensor_id = %s; + """, + (record_time_dt, sensor_id), + ) + + result = cursor.fetchone() + + fetched_record_time, fetched_sensor_id, fetched_value = result + + assert fetched_record_time == record_time_dt + assert fetched_sensor_id == sensor_id + assert pytest.approx(value_dec) == fetched_value + + method_frame, header_frame, body = channel.basic_get(queue=queue) + assert method_frame is None + assert header_frame is None + assert body is None + + +@pytest.mark.parametrize( + "fixture_names", + [tuple(["random_valid_csv_file"] * 5) for _ in range(5)], +) +def test_main_flow_batch_in_batch_no_failed_files( + raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + fixture_names: tuple[str, ...], + request: FixtureRequest, + monkeypatch: MonkeyPatch, + caplog: LogCaptureFixture, +): + random_csv_files: list[str] = [ + request.getfixturevalue(fixture_name) for fixture_name in fixture_names + ] + + conn, queue = raw_rabbitmq_pika_conn_config + channel = conn.channel() + channel.queue_declare(queue=queue, durable=True) + + for random_csv_file in random_csv_files: + channel.basic_publish( + exchange="", + routing_key=queue, + body=random_csv_file, + properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), + ) + + monkeypatch.setattr(RabbitMQConfig, "POLLING_TIMEOUT", 1) + + monkeypatch.setattr(PostgresConfig, "BATCH_UPSERT_SIZE", 2) + + with caplog.at_level("INFO"): + main() + assert f"Successfully upserted {random_csv_file}!" in caplog.text + + for random_csv_file in random_csv_files: + with open(random_csv_file, "r") as f: + reader = csv.reader(f) + for row in reader: + record_time, sensor_id, value = row + + record_time_dt = datetime.fromisoformat(record_time) + value_dec = Decimal(value) + + with raw_postgres_psycopg2_conn_config.cursor() as cursor: + cursor.execute( + """ + SELECT record_time, sensor_id, value + FROM records + WHERE record_time = %s AND sensor_id = %s; + """, + (record_time_dt, sensor_id), + ) + + ( + fetched_record_time, + fetched_sensor_id, + fetched_value, + ) = cursor.fetchone() + + assert fetched_record_time == record_time_dt + assert fetched_sensor_id == sensor_id + assert pytest.approx(value_dec) == fetched_value + + method_frame, header_frame, body = channel.basic_get(queue=queue) + assert method_frame is None + assert header_frame is None + assert body is None diff --git a/consumer/tests/test_deployments/test_script/test_main/test_main_upsert_record_resilience.py b/consumer/tests/test_deployments/test_script/test_main/test_main_upsert_record_resilience.py new file mode 100644 index 0000000..5fab317 --- /dev/null +++ b/consumer/tests/test_deployments/test_script/test_main/test_main_upsert_record_resilience.py @@ -0,0 +1,168 @@ +from src.deployments.script.main import main +from src.deployments.script.config import RabbitMQConfig +import pytest +from pytest import MonkeyPatch, LogCaptureFixture, FixtureRequest +from src.adapters.upsert_iot_records.postgres import PostgresUpsertIOTRecordsClient +import pika +import psycopg2 +import csv +from datetime import datetime +from decimal import Decimal + + +@pytest.mark.parametrize( + "fixture_name", + ["random_valid_csv_file"] * 5, +) +def test_main_flow_single_upsert_record_failed_resilience( + raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + fixture_name: str, + request: FixtureRequest, + monkeypatch: MonkeyPatch, + caplog: LogCaptureFixture, +): + random_csv_file: str = request.getfixturevalue(fixture_name) + + conn, queue = raw_rabbitmq_pika_conn_config + channel = conn.channel() + channel.queue_declare(queue=queue, durable=True) + + channel.basic_publish( + exchange="", + routing_key=queue, + body=random_csv_file, + properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), + ) + + monkeypatch.setattr(RabbitMQConfig, "POLLING_TIMEOUT", 1) + + counter = 0 + + def mock_upsert(*args, **kwargs): + nonlocal counter + counter += 1 + if counter == 1: + monkeypatch.undo() + raise Exception("Failed to upsert!") + + monkeypatch.setattr( + "src.deployments.script.main._upsert_iot_records_buffer", mock_upsert + ) + + with caplog.at_level("INFO"): + main() + assert "Failed to upsert!" in caplog.text + assert f"Failed to upsert {random_csv_file}!" in caplog.text + assert f"Successfully upserted {random_csv_file}!" in caplog.text + + with open(random_csv_file, "r") as f: + reader = csv.reader(f) + for row in reader: + record_time, sensor_id, value = row + + record_time_dt = datetime.fromisoformat(record_time) + value_dec = Decimal(value) + + with raw_postgres_psycopg2_conn_config.cursor() as cursor: + cursor.execute( + """ + SELECT record_time, sensor_id, value + FROM records + WHERE record_time = %s AND sensor_id = %s; + """, + (record_time_dt, sensor_id), + ) + + ( + fetched_record_time, + fetched_sensor_id, + fetched_value, + ) = cursor.fetchone() + + assert fetched_record_time == record_time_dt + assert fetched_sensor_id == sensor_id + assert pytest.approx(value_dec) == fetched_value + + method_frame, header_frame, body = channel.basic_get(queue=queue) + assert method_frame is None + assert header_frame is None + assert body is None + + +@pytest.mark.parametrize( + "fixture_name", + ["random_valid_csv_file"] * 5, +) +def test_main_flow_single_upsert_step_failed_resilience( + raw_postgres_psycopg2_conn_config: psycopg2.extensions.connection, + raw_rabbitmq_pika_conn_config: tuple[pika.BaseConnection, str], + fixture_name: str, + request: FixtureRequest, + monkeypatch: MonkeyPatch, + caplog: LogCaptureFixture, +): + random_csv_file: str = request.getfixturevalue(fixture_name) + + conn, queue = raw_rabbitmq_pika_conn_config + channel = conn.channel() + channel.queue_declare(queue=queue, durable=True) + + channel.basic_publish( + exchange="", + routing_key=queue, + body=random_csv_file, + properties=pika.BasicProperties(delivery_mode=pika.DeliveryMode.Persistent), + ) + + monkeypatch.setattr(RabbitMQConfig, "POLLING_TIMEOUT", 1) + + counter = 0 + + def mock_upsert(self, records) -> list[bool]: + nonlocal counter + counter += 1 + if counter == 1: + monkeypatch.undo() + return [False] * len(records) + + monkeypatch.setattr(PostgresUpsertIOTRecordsClient, "_upsert_batch", mock_upsert) + + with caplog.at_level("INFO"): + main() + assert "Failed to upsert all records!" in caplog.text + assert f"Failed to upsert {random_csv_file}!" in caplog.text + assert f"Successfully upserted {random_csv_file}!" in caplog.text + + with open(random_csv_file, "r") as f: + reader = csv.reader(f) + for row in reader: + record_time, sensor_id, value = row + + record_time_dt = datetime.fromisoformat(record_time) + value_dec = Decimal(value) + + with raw_postgres_psycopg2_conn_config.cursor() as cursor: + cursor.execute( + """ + SELECT record_time, sensor_id, value + FROM records + WHERE record_time = %s AND sensor_id = %s; + """, + (record_time_dt, sensor_id), + ) + + ( + fetched_record_time, + fetched_sensor_id, + fetched_value, + ) = cursor.fetchone() + + assert fetched_record_time == record_time_dt + assert fetched_sensor_id == sensor_id + assert pytest.approx(value_dec) == fetched_value + + method_frame, header_frame, body = channel.basic_get(queue=queue) + assert method_frame is None + assert header_frame is None + assert body is None diff --git a/consumer/tests/test_deployments/test_script/test_main/utils.py b/consumer/tests/test_deployments/test_script/test_main/utils.py new file mode 100644 index 0000000..56fce36 --- /dev/null +++ b/consumer/tests/test_deployments/test_script/test_main/utils.py @@ -0,0 +1,134 @@ +import random +import string +from pathlib import Path +from datetime import datetime, timedelta +from zoneinfo import ZoneInfo +import zoneinfo +import random +import json +from decimal import Decimal +import csv + + +def random_valid_format_rows() -> list[tuple[str, ...]]: + rows = [] + for _ in range(10): + random_timezone = random.choice(list(zoneinfo.available_timezones())) + random_time_delta = timedelta( + hours=random.randint(0, 24), + minutes=random.randint(0, 60), + seconds=random.randint(0, 60), + ) + random_datetime = datetime.now(tz=ZoneInfo(random_timezone)) - random_time_delta + random_sensor_id = "".join(random.choices(string.ascii_letters, k=10)) + random_value = Decimal(random.random() * 100) + rows.append((random_datetime.isoformat(), random_sensor_id, str(random_value))) + return rows + + +def random_invalid_datetime_rows() -> list[tuple[str, ...]]: + rows = [] + all_datetime_formats = [ + "%Y-%m-%dT%H:%M:%S%z", + "%Y-%m-%dT%H:%M%z", + "%Y-%m-%d %H:%M:%S%z", + "%Y-%m-%d %H:%M%z", + ] + for _ in range(10): + random_timezone = random.choice(list(zoneinfo.available_timezones())) + random_time_delta = timedelta( + hours=random.randint(0, 24), + minutes=random.randint(0, 60), + seconds=random.randint(0, 60), + ) + random_datetime = datetime.now(tz=ZoneInfo(random_timezone)) - random_time_delta + random_sensor_id = "".join(random.choices(string.ascii_letters, k=10)) + random_value = Decimal(random.random() * 100) + random_datetime_format = random.choice(all_datetime_formats) + rows.append( + ( + random_datetime.strftime(random_datetime_format), + random_sensor_id, + str(random_value), + ) + ) + return rows + + +def random_invalid_value_rows() -> list[tuple[str, ...]]: + rows = [] + for _ in range(10): + random_timezone = random.choice(list(zoneinfo.available_timezones())) + random_time_delta = timedelta( + hours=random.randint(0, 24), + minutes=random.randint(0, 60), + seconds=random.randint(0, 60), + ) + random_datetime = datetime.now(tz=ZoneInfo(random_timezone)) - random_time_delta + random_sensor_id = "".join(random.choices(string.ascii_letters, k=10)) + random_value = "".join(random.choices(string.ascii_letters, k=10)) + rows.append( + ( + random_datetime.isoformat(), + random_sensor_id, + random_value, + ) + ) + return rows + + +def random_invalid_datetime_and_value_rows() -> list[tuple[str, ...]]: + rows = [] + all_datetime_formats = [ + "%Y-%m-%dT%H:%M:%S%z", + "%Y-%m-%dT%H:%M%z", + "%Y-%m-%d %H:%M:%S%z", + "%Y-%m-%d %H:%M%z", + ] + for _ in range(10): + random_timezone = random.choice(list(zoneinfo.available_timezones())) + random_time_delta = timedelta( + hours=random.randint(0, 24), + minutes=random.randint(0, 60), + seconds=random.randint(0, 60), + ) + random_datetime = datetime.now(tz=ZoneInfo(random_timezone)) - random_time_delta + random_sensor_id = "".join(random.choices(string.ascii_letters, k=10)) + random_value = "".join(random.choices(string.ascii_letters, k=10)) + random_datetime_format = random.choice(all_datetime_formats) + rows.append( + ( + random_datetime.strftime(random_datetime_format), + random_sensor_id, + str(random_value), + ) + ) + return rows + + +def random_csv_file(base_dir: Path, rows: list[tuple[str, ...]]) -> str: + filename = "".join(random.choices(string.ascii_letters, k=10)) + ".csv" + filepath = base_dir.joinpath(filename) + with open(filepath, "w") as csvfile: + writer = csv.writer(csvfile, delimiter=",") + writer.writerows(rows) + return str(filepath) + + +def random_tsv_file(base_dir: Path, rows: list[tuple[str, ...]]) -> str: + filename = "".join(random.choices(string.ascii_letters, k=10)) + ".tsv" + filepath = base_dir.joinpath(filename) + with open(filepath, "w") as csvfile: + writer = csv.writer(csvfile, delimiter="\t") + writer.writerows(rows) + return str(filepath) + + +def random_ndjson_file(base_dir: Path, rows: list[tuple[str, ...]]) -> str: + filename = "".join(random.choices(string.ascii_letters, k=10)) + ".tsv" + filepath = base_dir.joinpath(filename) + with open(filepath, "w") as csvfile: + for row in rows: + json.dump(row, csvfile) + csvfile.write("\n") + return str(filepath) diff --git a/docker-compose.yml b/docker-compose.yml index fd4a64f..b19bfce 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -75,3 +75,6 @@ services: CSV_PARSER_RECOGNIZED_DATETIME_FORMATS: ${CSV_PARSER_RECOGNIZED_DATETIME_FORMATS} CSV_PARSER_DELIMITER: ${CSV_PARSER_DELIMITER} CSV_PARSER_FILE_EXTENSION: ${CSV_PARSER_FILE_EXTENSION} + deploy: + mode: replicated + replicas: ${CONSUMER_REPLICAS} diff --git a/test_generator.py b/test_generator.py new file mode 100644 index 0000000..58b64be --- /dev/null +++ b/test_generator.py @@ -0,0 +1,7 @@ +import argparse +import csv +import datetime + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser From ea6b1241238fbee18326e257aa83c764896dfa71 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sun, 3 Dec 2023 10:50:50 +0000 Subject: [PATCH 33/36] Updated coverage.svg --- coverage.svg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/coverage.svg b/coverage.svg index 3438732..b6c4e36 100644 --- a/coverage.svg +++ b/coverage.svg @@ -15,7 +15,7 @@ coverage coverage - 97% - 97% + 95% + 95% From fa557c5cff67e0ecdea8d1b03a4b0a5c03eb1a75 Mon Sep 17 00:00:00 2001 From: alexau Date: Sun, 3 Dec 2023 20:04:05 +0800 Subject: [PATCH 34/36] Updated the deps --- .env | 8 ++- .gitignore | 1 + Makefile | 23 ++++--- README.md | 47 +++++++++++++ consumer/dockerfile | 40 ++++++----- consumer/poetry.lock | 2 +- consumer/pyproject.toml | 1 + consumer/requirements.txt | 1 + docker-compose.yml | 35 ++++++++-- producer/dockerfile | 38 ++++++----- producer/poetry.lock | 2 +- producer/pyproject.toml | 1 + producer/requirements.txt | 1 + test_generator.py | 138 +++++++++++++++++++++++++++++++++++++- 14 files changed, 284 insertions(+), 54 deletions(-) diff --git a/.env b/.env index ee2c7c9..4bb87c8 100644 --- a/.env +++ b/.env @@ -32,8 +32,14 @@ CONSUMER_LOG_DATE_FORMAT="%Y-%m-%d %H:%M:%S" CONSUMER_LOG_DIR=./logs/producer CONSUMER_LOG_RETENTION=7 CONSUMER_LOG_ROTATION=midnight -CONSUMER_REPLICAS=16 +CONSUMER_REPLICAS=2 CSV_PARSER_RECOGNIZED_DATETIME_FORMATS="%Y-%m-%dT%H:%M:%S.%f%z" CSV_PARSER_DELIMITER="," CSV_PARSER_FILE_EXTENSION=.csv + +GEN_NUM_SENSORS=1000 +GEN_NUM_RECORDS=100000 +GEN_START_DATE=2021-01-01 +GEN_RECORD_INTERVAL=5 +GEN_TIMEZONE=Asia/Hong_Kong diff --git a/.gitignore b/.gitignore index 366f53c..f8663b2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .pytest_cache __pycache__ .mypy_cache +records diff --git a/Makefile b/Makefile index 78c985f..9082133 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,7 @@ POSTGRES_HOST=localhost RABBITMQ_HOST=localhost build: + docker compose pull --ignore-buildable docker compose build up: docker compose up @@ -13,32 +14,27 @@ down: docker compose down stats: docker stats --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.MemPerc}}\t{{.NetIO}}\t{{.BlockIO}}\t{{.PIDs}}" -export_requirements: - cd producer && \ - poetry export -f requirements.txt --output requirements.txt --without-hashes && \ - cd ../consumer && \ - poetry export -f requirements.txt --output requirements.txt --without-hashes setup_test_env: docker compose -f docker-compose.test.yml up -d test_producer: - export POSTGRES_HOST=localhost && \ + export POSTGRES_HOST=$(POSTGRES_HOST) && \ export POSTGRES_PORT=$(POSTGRES_PORT) && \ export POSTGRES_USERNAME=$(POSTGRES_USERNAME) && \ export POSTGRES_PASSWORD=$(POSTGRES_PASSWORD) && \ export POSTGRES_DATABASE=$(POSTGRES_DATABASE) && \ - export RABBITMQ_HOST=localhost && \ + export RABBITMQ_HOST=$(RABBITMQ_HOST) && \ export RABBITMQ_PORT=$(RABBITMQ_PORT) && \ export RABBITMQ_USERNAME=$(RABBITMQ_USERNAME) && \ export RABBITMQ_PASSWORD=$(RABBITMQ_PASSWORD) && \ export QUEUE_NAME=$(QUEUE_NAME) && \ COVERAGE_FILE=.coverage_producer coverage run -m pytest -vx producer/tests test_consumer: - export POSTGRES_HOST=localhost && \ + export POSTGRES_HOST=$(POSTGRES_HOST) && \ export POSTGRES_PORT=$(POSTGRES_PORT) && \ export POSTGRES_USERNAME=$(POSTGRES_USERNAME) && \ export POSTGRES_PASSWORD=$(POSTGRES_PASSWORD) && \ export POSTGRES_DATABASE=$(POSTGRES_DATABASE) && \ - export RABBITMQ_HOST=localhost && \ + export RABBITMQ_HOST=$(RABBITMQ_HOST) && \ export RABBITMQ_PORT=$(RABBITMQ_PORT) && \ export RABBITMQ_USERNAME=$(RABBITMQ_USERNAME) && \ export RABBITMQ_PASSWORD=$(RABBITMQ_PASSWORD) && \ @@ -50,3 +46,12 @@ coverage_report: coverage combine .coverage_producer .coverage_consumer && \ coverage report -m --omit="*/tests/*" test: test_producer test_consumer coverage_report + +generate_csv: + python test_generator.py \ + --num-sensors $(GEN_NUM_SENSORS) \ + --num-records $(GEN_NUM_RECORDS) \ + --record-interval $(GEN_RECORD_INTERVAL) \ + --start-date $(GEN_START_DATE) \ + --timezone $(GEN_TIMEZONE) \ + --dir $(TARGET_FILE_DIR) diff --git a/README.md b/README.md index dc121e5..6b7a7c8 100644 --- a/README.md +++ b/README.md @@ -2,3 +2,50 @@ ![Build Status](https://github.com/github/docs/actions/workflows/test.yml/badge.svg) ![Code Coverage](./coverage.svg) + +## Description +This is a simple producer consumer application that reads a csv file and writes the data to a database. The application is written in Python. + +## Installation +1. Clone the repository +2. Install make and docker +- For Ubuntu and Debian +```bash +sudo apt install make +``` +- For Fedora and CentOS +```bash +sudo yum install make +``` + +For Docker installation, please refer to the [official documentation](https://docs.docker.com/engine/install/) + +## Usage + +### Postgres in Docker +If you don't have a postgres database running, you can use the docker image provided in this repository. The docker image is based on the official postgres docker image. The docker image is configured to create a database and a user with the following .env variables: + +- `POSTGRES_VERSION_TAG` +- `POSTGRES_PORT` +- `POSTGRES_USERNAME` +- `POSTGRES_PASSWORD` +- `POSTGRES_DATABASE` + +Please make sure you don't change the name of the variables as they are used in the python application as well. + +### Running the application + +1. Run the following command to build the docker image +```bash +$ make build +``` +2. Run the following command to start the docker compose stack +```bash +$ make up / make up_d +``` +The `up_d` command will run the container in detached mode. + +3. Run the following command to stop the docker compose stack +```bash +$ make down +``` diff --git a/consumer/dockerfile b/consumer/dockerfile index b343433..0642ced 100644 --- a/consumer/dockerfile +++ b/consumer/dockerfile @@ -1,36 +1,40 @@ ARG AMAZON_LINUX_VERSION_TAG FROM amazonlinux:${AMAZON_LINUX_VERSION_TAG} as build -RUN yum update -y && \ - yum install -y \ +RUN yum install -y \ python3.11 \ - python3-pip \ - python3-devel \ - shadow-utils && \ - yum clean all + python3.11-pip \ + python3.11-devel \ + shadow-utils RUN adduser app - -USER app ENV HOME=/home/app WORKDIR ${HOME} +RUN chown -R app:app /home/app + +USER app + COPY requirements.txt . -RUN pip3 install --user -r requirements.txt -FROM amazonlinux:${AMAZON_LINUX_VERSION_TAG} as runtime -RUN yum update -y && \ - yum install -y \ +RUN python3.11 -m pip install --user --no-warn-script-location -r requirements.txt + +FROM amazonlinux:2023.2.20231026.0 as runtime + +RUN yum install -y \ python3.11 \ - python3-pip \ - shadow-utils && \ - yum clean all + python3.11-pip \ + shadow-utils + RUN adduser app +WORKDIR /home/app + +RUN chown -R app:app /home/app USER app ENV HOME=/home/app WORKDIR ${HOME} -COPY --from=build ${HOME}/.local ${HOME}/.local +COPY --from=build /home/app/.local /home/app/.local -COPY src/ . -CMD python3.11 -m deployments.script.main +COPY src ./src/ +CMD python3.11 -m src.deployments.script.main diff --git a/consumer/poetry.lock b/consumer/poetry.lock index 04b864c..19cac3a 100644 --- a/consumer/poetry.lock +++ b/consumer/poetry.lock @@ -311,4 +311,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "bdaa71e5b91e5b90cc4da6748f061c503660fe30c28baebe08774e02404358dc" +content-hash = "bd2f59ee5a762770c523b8a19a3db5f2e2d8df1e9f4560580855200cea697fed" diff --git a/consumer/pyproject.toml b/consumer/pyproject.toml index ba0a9b6..8084943 100644 --- a/consumer/pyproject.toml +++ b/consumer/pyproject.toml @@ -10,6 +10,7 @@ python = "^3.11" pika = "^1.3.2" psycopg2-binary = "^2.9.9" types-psycopg2 = "^2.9.21.19" +typing-extensions = "^4.8.0" [tool.poetry.group.dev.dependencies] diff --git a/consumer/requirements.txt b/consumer/requirements.txt index 6bfaac6..ec532dc 100644 --- a/consumer/requirements.txt +++ b/consumer/requirements.txt @@ -1,3 +1,4 @@ pika==1.3.2 ; python_version >= "3.11" and python_version < "4.0" psycopg2-binary==2.9.9 ; python_version >= "3.11" and python_version < "4.0" types-psycopg2==2.9.21.19 ; python_version >= "3.11" and python_version < "4.0" +typing-extensions==4.8.0 ; python_version >= "3.11" and python_version < "4.0" diff --git a/docker-compose.yml b/docker-compose.yml index b19bfce..f57054c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,6 +15,13 @@ services: ports: - ${POSTGRES_PORT}:5432 restart: always + healthcheck: + test: ["CMD", "pg_isready", "-U", "${POSTGRES_USERNAME}"] + interval: 15s + timeout: 10s + retries: 5 + start_period: 10s + records_rabbitmq: image: rabbitmq:${RABBITMQ_VERSION_TAG} container_name: records_rabbitmq @@ -25,6 +32,13 @@ services: - ${RABBITMQ_WEBAPP_PORT}:15672 - ${RABBITMQ_PORT}:5672 restart: always + healthcheck: + test: ["CMD", "rabbitmqctl", "status"] + interval: 15s + timeout: 10s + retries: 5 + start_period: 10s + records_producer: image: records_producer:latest container_name: records_producer @@ -44,9 +58,15 @@ services: LOG_ROTATION: ${PRODUCER_LOG_ROTATION} RABBITMQ_HOST: records_rabbitmq RABBITMQ_PORT: 5672 - RABBITMQ_USER: ${RABBITMQ_USER} + RABBITMQ_USERNAME: ${RABBITMQ_USERNAME} RABBITMQ_PASSWORD: ${RABBITMQ_PASSWORD} - RABBITMQ_QUEUE: ${QUEUE_NAME} + RABBITMQ_QUEUE_NAME: ${RABBITMQ_QUEUE_NAME} + depends_on: + records_postgres: + condition: service_healthy + records_rabbitmq: + condition: service_healthy + records_consumer: image: records_consumer:latest build: @@ -63,12 +83,12 @@ services: LOG_ROTATION: ${CONSUMER_LOG_ROTATION} RABBITMQ_HOST: records_rabbitmq RABBITMQ_PORT: 5672 - RABBITMQ_USER: ${RABBITMQ_USER} + RABBITMQ_USERNAME: ${RABBITMQ_USERNAME} RABBITMQ_PASSWORD: ${RABBITMQ_PASSWORD} - RABBITMQ_QUEUE: ${QUEUE_NAME} + RABBITMQ_QUEUE_NAME: ${RABBITMQ_QUEUE_NAME} POSTGRES_HOST: records_postgres POSTGRES_PORT: 5432 - POSTGRES_USER: ${POSTGRES_USER} + POSTGRES_USERNAME: ${POSTGRES_USERNAME} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} POSTGRES_DATABASE: ${POSTGRES_DATABASE} POSTGRES_BATCH_UPSERT_SIZE: ${POSTGRES_BATCH_UPSERT_SIZE} @@ -78,3 +98,8 @@ services: deploy: mode: replicated replicas: ${CONSUMER_REPLICAS} + depends_on: + records_postgres: + condition: service_healthy + records_rabbitmq: + condition: service_healthy diff --git a/producer/dockerfile b/producer/dockerfile index 2de51d5..9f0a359 100644 --- a/producer/dockerfile +++ b/producer/dockerfile @@ -1,36 +1,40 @@ ARG AMAZON_LINUX_VERSION_TAG FROM amazonlinux:${AMAZON_LINUX_VERSION_TAG} as build -RUN yum update -y && \ - yum install -y \ +RUN yum install -y \ python3.11 \ - python3-pip \ - python3-devel \ - shadow-utils && \ - yum clean all + python3.11-devel \ + python3.11-pip \ + shadow-utils RUN adduser app - -USER app ENV HOME=/home/app WORKDIR ${HOME} +RUN chown -R app:app /home/app + +USER app + COPY requirements.txt . -RUN pip3 install --user -r requirements.txt -FROM amazonlinux:${AMAZON_LINUX_VERSION_TAG} as runtime -RUN yum update -y && \ - yum install -y \ +RUN python3.11 -m pip install --user --no-warn-script-location -r requirements.txt + +FROM amazonlinux:2023.2.20231026.0 as runtime + +RUN yum install -y \ python3.11 \ - python3-pip \ - shadow-utils && \ - yum clean all + python3.11-pip \ + shadow-utils + RUN adduser app +WORKDIR /home/app + +RUN chown -R app:app /home/app USER app ENV HOME=/home/app WORKDIR ${HOME} -COPY --from=build ${HOME}/.local ${HOME}/.local +COPY --from=build /home/app/.local /home/app/.local -COPY src . +COPY src ./src/ CMD python3.11 -m src.deployments.script.main diff --git a/producer/poetry.lock b/producer/poetry.lock index 2224d04..e98ef08 100644 --- a/producer/poetry.lock +++ b/producer/poetry.lock @@ -219,4 +219,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "efdbc1051d5965ad018cb20ee591664d4975b097fe10dc948c9a4b11126ba3d1" +content-hash = "14a72dac6b407513a28c1b3b2a0709cb2afe4f8f4bf6049dccaebb6cac3ee075" diff --git a/producer/pyproject.toml b/producer/pyproject.toml index 91d3883..7bfc447 100644 --- a/producer/pyproject.toml +++ b/producer/pyproject.toml @@ -8,6 +8,7 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.11" pika = "^1.3.2" +typing-extensions = "^4.8.0" [tool.poetry.group.dev.dependencies] diff --git a/producer/requirements.txt b/producer/requirements.txt index 68bf7d3..bba668f 100644 --- a/producer/requirements.txt +++ b/producer/requirements.txt @@ -1 +1,2 @@ pika==1.3.2 ; python_version >= "3.11" and python_version < "4.0" +typing-extensions==4.8.0 ; python_version >= "3.11" and python_version < "4.0" diff --git a/test_generator.py b/test_generator.py index 58b64be..2d54770 100644 --- a/test_generator.py +++ b/test_generator.py @@ -1,7 +1,141 @@ import argparse import csv -import datetime +from datetime import datetime, timedelta +import random +from zoneinfo import ZoneInfo +from pathlib import Path +from uuid import uuid4 +import logging +from tqdm.auto import tqdm +from concurrent.futures import ThreadPoolExecutor, as_completed +from io import StringIO +import os + +logging.basicConfig(level=logging.INFO) def parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser + parser = argparse.ArgumentParser() + parser.add_argument( + "--num-sensors", + type=int, + default=10, + help="Number of sensors to generate data for", + ) + + parser.add_argument( + "--dir", + type=str, + default="records", + help="Directory to save the generated data", + ) + + parser.add_argument( + "--num-records", + type=int, + default=100, + help="Number of records to generate for each sensor", + ) + + parser.add_argument( + "--record-interval", + type=float, + default=1, + help="Interval between records in seconds", + ) + + parser.add_argument( + "--start-date", + type=str, + default="2021-01-01", + help="Start date for the generated data (YYYY-MM-DD)", + ) + + parser.add_argument( + "--timezone", + type=str, + default="Asia/Hong_Kong", + help="Timezone for the generated data", + ) + return parser.parse_args() + + +def generate_data( + sensor_id: str, + num_records: int, + record_interval: int, + start_date: datetime, + base_dir: Path, +) -> None: + with (base_dir / f"{sensor_id}.csv").open("w") as f: + with StringIO() as buffer: + writer = csv.DictWriter( + buffer, fieldnames=["record_time", "sensor_id", "value"] + ) + writer.writeheader() + all_dates = [ + start_date + timedelta(seconds=i * record_interval) + for i in range(num_records) + ] + + all_random_values = [random.random() for _ in range(num_records)] + + writer.writerows( + [ + { + "record_time": date.isoformat(), + "sensor_id": sensor_id, + "value": random_value, + } + for date, random_value in zip(all_dates, all_random_values) + ] + ) + f.write(buffer.getvalue()) + + +def main( + num_sensors: int, + num_records: int, + record_interval: int, + start_date_str: str, + dir: str, + timezone: str, +) -> None: + logging.info("Generating data...") + + start_date = datetime.strptime(start_date_str, "%Y-%m-%d").replace( + tzinfo=ZoneInfo(timezone) + ) + base_dir = Path(dir) + base_dir.mkdir(exist_ok=True) + futures = [] + with tqdm(total=num_sensors) as pbar: + with ThreadPoolExecutor(max_workers=os.cpu_count() * 2) as executor: + for i in range(num_sensors): + sensor_id = f"{uuid4().hex[:8]}_{i}" + futures.append( + executor.submit( + generate_data, + sensor_id, + num_records, + record_interval, + start_date, + base_dir, + ) + ) + + for _ in as_completed(futures): + pbar.update(1) + logging.info("Done") + + +if __name__ == "__main__": + args = parse_args() + main( + args.num_sensors, + args.num_records, + args.record_interval, + args.start_date, + args.dir, + args.timezone, + ) From 60a4a6f0b4c3033d1547d1dbcf13ddd1ffc658ed Mon Sep 17 00:00:00 2001 From: alexau Date: Sun, 3 Dec 2023 20:47:31 +0800 Subject: [PATCH 35/36] udpated the test --- .env | 2 +- Makefile | 6 +- consumer/.dockerignore | 3 + .../adapters/file_parse_iot_records/csv.py | 1 + .../test_csv/utils.py | 12 ++- database/.dockerignore | 3 + docker-compose.yml | 15 +++- producer/.dockerignore | 3 + producer/dockerfile | 3 +- producer/src/deployments/script/main.py | 4 + .../src/deployments/script/setup_logging.py | 78 +++++++++---------- 11 files changed, 80 insertions(+), 50 deletions(-) create mode 100644 consumer/.dockerignore create mode 100644 database/.dockerignore create mode 100644 producer/.dockerignore diff --git a/.env b/.env index 4bb87c8..dae9792 100644 --- a/.env +++ b/.env @@ -34,7 +34,7 @@ CONSUMER_LOG_RETENTION=7 CONSUMER_LOG_ROTATION=midnight CONSUMER_REPLICAS=2 -CSV_PARSER_RECOGNIZED_DATETIME_FORMATS="%Y-%m-%dT%H:%M:%S.%f%z" +CSV_PARSER_RECOGNIZED_DATETIME_FORMATS="%Y-%m-%dT%H:%M:%S%z" CSV_PARSER_DELIMITER="," CSV_PARSER_FILE_EXTENSION=.csv diff --git a/Makefile b/Makefile index 9082133..dbc154d 100644 --- a/Makefile +++ b/Makefile @@ -7,9 +7,9 @@ build: docker compose pull --ignore-buildable docker compose build up: - docker compose up -up_d: - docker compose up -d + docker compose up -d --wait && docker compose logs -f --tail 100 records_producer records_consumer +logs: + docker compose logs -f --tail 100 records_producer records_consumer down: docker compose down stats: diff --git a/consumer/.dockerignore b/consumer/.dockerignore new file mode 100644 index 0000000..4a643f0 --- /dev/null +++ b/consumer/.dockerignore @@ -0,0 +1,3 @@ +__pycache__ +.pytest_cache +.mypy_cache diff --git a/consumer/src/adapters/file_parse_iot_records/csv.py b/consumer/src/adapters/file_parse_iot_records/csv.py index eb2f4f8..a09c10b 100644 --- a/consumer/src/adapters/file_parse_iot_records/csv.py +++ b/consumer/src/adapters/file_parse_iot_records/csv.py @@ -52,6 +52,7 @@ def parse_stream(self, filename: str) -> Iterator[IOTRecord]: self._basic_file_check(filename) with open(filename) as csvfile: reader = csv.reader(csvfile, delimiter=self._delimiter, strict=True) + next(reader) # skip header yield from self._parse_iter(reader) except OSError as e: logging.exception(e) diff --git a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/utils.py b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/utils.py index e7980b2..aaec3f4 100644 --- a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/utils.py +++ b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/utils.py @@ -80,7 +80,7 @@ def random_invalid_value_rows() -> list[tuple[str, ...]]: def random_invalid_datetime_and_value_rows() -> list[tuple[str, ...]]: rows = [] all_datetime_formats = [ - "%Y-%m-%dT%H:%M:%S%z", + "%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%dT%H:%M%z", "%Y-%m-%d %H:%M:%S%z", "%Y-%m-%d %H:%M%z", @@ -110,7 +110,10 @@ def random_csv_file(base_dir: Path, rows: list[tuple[str, ...]]) -> str: filename = "".join(random.choices(string.ascii_letters, k=10)) + ".csv" filepath = base_dir.joinpath(filename) with open(filepath, "w") as csvfile: - writer = csv.writer(csvfile, delimiter=",") + writer = csv.DictWriter( + csvfile, delimiter=",", fieldnames=["record_time", "sensor_id", "value"] + ) + writer.writeheader() writer.writerows(rows) return str(filepath) @@ -119,7 +122,10 @@ def random_tsv_file(base_dir: Path, rows: list[tuple[str, ...]]) -> str: filename = "".join(random.choices(string.ascii_letters, k=10)) + ".tsv" filepath = base_dir.joinpath(filename) with open(filepath, "w") as csvfile: - writer = csv.writer(csvfile, delimiter="\t") + writer = csv.DictWriter( + csvfile, delimiter="\t", fieldnames=["record_time", "sensor_id", "value"] + ) + writer.writeheader() writer.writerows(rows) return str(filepath) diff --git a/database/.dockerignore b/database/.dockerignore new file mode 100644 index 0000000..4a643f0 --- /dev/null +++ b/database/.dockerignore @@ -0,0 +1,3 @@ +__pycache__ +.pytest_cache +.mypy_cache diff --git a/docker-compose.yml b/docker-compose.yml index f57054c..4ca485c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -56,11 +56,15 @@ services: LOG_DIR: ${PRODUCER_LOG_DIR} LOG_RETENTION: ${PRODUCER_LOG_RETENTION} LOG_ROTATION: ${PRODUCER_LOG_ROTATION} - RABBITMQ_HOST: records_rabbitmq + RABBITMQ_HOST: localhost RABBITMQ_PORT: 5672 RABBITMQ_USERNAME: ${RABBITMQ_USERNAME} RABBITMQ_PASSWORD: ${RABBITMQ_PASSWORD} RABBITMQ_QUEUE_NAME: ${RABBITMQ_QUEUE_NAME} + network_mode: host + volumes: + - ./${TARGET_FILE_DIR}:/home/app/${TARGET_FILE_DIR}:ro + # - ./${PRODUCER_LOG_DIR}:/home/app/${PRODUCER_LOG_DIR} depends_on: records_postgres: condition: service_healthy @@ -81,12 +85,13 @@ services: LOG_DIR: ${CONSUMER_LOG_DIR} LOG_RETENTION: ${CONSUMER_LOG_RETENTION} LOG_ROTATION: ${CONSUMER_LOG_ROTATION} - RABBITMQ_HOST: records_rabbitmq + RABBITMQ_HOST: localhost RABBITMQ_PORT: 5672 RABBITMQ_USERNAME: ${RABBITMQ_USERNAME} RABBITMQ_PASSWORD: ${RABBITMQ_PASSWORD} RABBITMQ_QUEUE_NAME: ${RABBITMQ_QUEUE_NAME} - POSTGRES_HOST: records_postgres + RABBITMQ_POLLING_TIMEOUT: ${RABBITMQ_POLLING_TIMEOUT} + POSTGRES_HOST: localhost POSTGRES_PORT: 5432 POSTGRES_USERNAME: ${POSTGRES_USERNAME} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} @@ -95,6 +100,10 @@ services: CSV_PARSER_RECOGNIZED_DATETIME_FORMATS: ${CSV_PARSER_RECOGNIZED_DATETIME_FORMATS} CSV_PARSER_DELIMITER: ${CSV_PARSER_DELIMITER} CSV_PARSER_FILE_EXTENSION: ${CSV_PARSER_FILE_EXTENSION} + network_mode: host + volumes: + - ./${TARGET_FILE_DIR}:/home/app/${TARGET_FILE_DIR}:ro + # - ./${CONSUMER_LOG_DIR}:/home/app/src/${CONSUMER_LOG_DIR} deploy: mode: replicated replicas: ${CONSUMER_REPLICAS} diff --git a/producer/.dockerignore b/producer/.dockerignore new file mode 100644 index 0000000..4a643f0 --- /dev/null +++ b/producer/.dockerignore @@ -0,0 +1,3 @@ +__pycache__ +.pytest_cache +.mypy_cache diff --git a/producer/dockerfile b/producer/dockerfile index 9f0a359..7cdf707 100644 --- a/producer/dockerfile +++ b/producer/dockerfile @@ -28,6 +28,8 @@ RUN yum install -y \ RUN adduser app WORKDIR /home/app +COPY src ./src/ + RUN chown -R app:app /home/app USER app @@ -36,5 +38,4 @@ WORKDIR ${HOME} COPY --from=build /home/app/.local /home/app/.local -COPY src ./src/ CMD python3.11 -m src.deployments.script.main diff --git a/producer/src/deployments/script/main.py b/producer/src/deployments/script/main.py index e66c393..10bea4e 100644 --- a/producer/src/deployments/script/main.py +++ b/producer/src/deployments/script/main.py @@ -27,7 +27,10 @@ def main() -> None: successes_map = {} try: + logging.info("Publishing filenames...") + for filename in traverse_files(): + logging.info(f"Publishing {filename}...") successes_map[filename] = publish_filenames_client.publish(filename) failed_filenames = [ @@ -40,6 +43,7 @@ def main() -> None: logging.exception(e) raise e finally: + logging.info("Closing publish filenames client...") publish_filenames_client.close() diff --git a/producer/src/deployments/script/setup_logging.py b/producer/src/deployments/script/setup_logging.py index dcae074..e41bc91 100644 --- a/producer/src/deployments/script/setup_logging.py +++ b/producer/src/deployments/script/setup_logging.py @@ -20,50 +20,50 @@ def setup_logging() -> None: stream_handler.setLevel(LoggingConfig.LOG_LEVEL) handlers.append(stream_handler) - if LOG_LEVEL_INT is not None and LOG_LEVEL_INT <= logging.INFO: - info_handler = TimedRotatingFileHandler( - filename=f"{LoggingConfig.LOG_DIR}/info.log", - when=LoggingConfig.LOG_ROTATION, - interval=1, - backupCount=LoggingConfig.LOG_RETENTION, - ) - info_handler.setFormatter( - logging.Formatter( - LoggingConfig.LOG_FORMAT, datefmt=LoggingConfig.LOG_DATE_FORMAT - ) + # if LOG_LEVEL_INT is not None and LOG_LEVEL_INT <= logging.INFO: + info_handler = TimedRotatingFileHandler( + filename=f"{LoggingConfig.LOG_DIR}/info.log", + when=LoggingConfig.LOG_ROTATION, + interval=1, + backupCount=LoggingConfig.LOG_RETENTION, + ) + info_handler.setFormatter( + logging.Formatter( + LoggingConfig.LOG_FORMAT, datefmt=LoggingConfig.LOG_DATE_FORMAT ) - info_handler.setLevel(logging.INFO) - handlers.append(info_handler) + ) + info_handler.setLevel(logging.INFO) + handlers.append(info_handler) - if LOG_LEVEL_INT is not None and LOG_LEVEL_INT <= logging.WARNING: - warning_handler = TimedRotatingFileHandler( - filename=f"{LoggingConfig.LOG_DIR}/warning.log", - when=LoggingConfig.LOG_ROTATION, - interval=1, - backupCount=LoggingConfig.LOG_RETENTION, - ) - warning_handler.setFormatter( - logging.Formatter( - LoggingConfig.LOG_FORMAT, datefmt=LoggingConfig.LOG_DATE_FORMAT - ) + # if LOG_LEVEL_INT is not None and LOG_LEVEL_INT <= logging.WARNING: + warning_handler = TimedRotatingFileHandler( + filename=f"{LoggingConfig.LOG_DIR}/warning.log", + when=LoggingConfig.LOG_ROTATION, + interval=1, + backupCount=LoggingConfig.LOG_RETENTION, + ) + warning_handler.setFormatter( + logging.Formatter( + LoggingConfig.LOG_FORMAT, datefmt=LoggingConfig.LOG_DATE_FORMAT ) - warning_handler.setLevel(logging.WARNING) - handlers.append(warning_handler) + ) + warning_handler.setLevel(logging.WARNING) + handlers.append(warning_handler) - if LOG_LEVEL_INT is not None and LOG_LEVEL_INT <= logging.ERROR: - error_handler = TimedRotatingFileHandler( - filename=f"{LoggingConfig.LOG_DIR}/error.log", - when=LoggingConfig.LOG_ROTATION, - interval=1, - backupCount=LoggingConfig.LOG_RETENTION, - ) - error_handler.setFormatter( - logging.Formatter( - LoggingConfig.LOG_FORMAT, datefmt=LoggingConfig.LOG_DATE_FORMAT - ) + # if LOG_LEVEL_INT is not None and LOG_LEVEL_INT <= logging.ERROR: + error_handler = TimedRotatingFileHandler( + filename=f"{LoggingConfig.LOG_DIR}/error.log", + when=LoggingConfig.LOG_ROTATION, + interval=1, + backupCount=LoggingConfig.LOG_RETENTION, + ) + error_handler.setFormatter( + logging.Formatter( + LoggingConfig.LOG_FORMAT, datefmt=LoggingConfig.LOG_DATE_FORMAT ) - error_handler.setLevel(logging.ERROR) - handlers.append(error_handler) + ) + error_handler.setLevel(logging.ERROR) + handlers.append(error_handler) root_logger = logging.getLogger() root_logger.setLevel(LoggingConfig.LOG_LEVEL) From dbe4a10c7e86593b321e342fc6cfbb2fd2fd4065 Mon Sep 17 00:00:00 2001 From: alexau Date: Sun, 3 Dec 2023 23:21:44 +0800 Subject: [PATCH 36/36] Updated the test and readme --- .env | 20 +- .github/workflows/test.yml | 7 +- .gitignore | 5 + Makefile | 17 +- README.md | 180 +++++++++++++++++- consumer/dockerfile | 34 +--- .../fetch_filenames_stream/rabbitmq.py | 3 + .../adapters/file_parse_iot_records/csv.py | 13 +- consumer/src/deployments/script/config.py | 4 +- consumer/src/deployments/script/main.py | 6 +- .../src/deployments/script/setup_logging.py | 81 ++++---- .../test_csv/conftest.py | 1 - .../test_csv/test_failed_parse.py | 2 + .../test_csv/utils.py | 66 +++---- .../test_main_read_file_resilience.py | 1 + .../test_main/test_main_successful.py | 4 + .../test_main_upsert_record_resilience.py | 2 + .../test_script/test_main/utils.py | 76 +++----- database/dockerfile | 1 - docker-compose.test.yml | 17 ++ docker-compose.yml | 39 ++-- producer/dockerfile | 33 +--- .../adapters/publish_filenames/rabbitmq.py | 3 + producer/src/deployments/script/config.py | 1 + producer/src/deployments/script/main.py | 1 + .../src/deployments/script/setup_logging.py | 15 +- test_generator.py | 39 ++-- 27 files changed, 403 insertions(+), 268 deletions(-) diff --git a/.env b/.env index dae9792..e4964b2 100644 --- a/.env +++ b/.env @@ -1,9 +1,12 @@ -POSTGRES_VERSION_TAG=15.3-alpine3.17 +TZ=Asia/Hong_Kong + +POSTGRES_VERSION_TAG=13 POSTGRES_PORT=5432 POSTGRES_USERNAME=postgres POSTGRES_PASSWORD=postgres POSTGRES_DATABASE=records -POSTGRES_BATCH_UPSERT_SIZE=1000 +POSTGRES_BATCH_UPSERT_SIZE=5000 +POSTGRES_VOLUME_DIR=./postgres-data RABBITMQ_VERSION_TAG=3.12.10-management RABBITMQ_USERNAME=rabbitmq @@ -11,30 +14,31 @@ RABBITMQ_PASSWORD=rabbitmq RABBITMQ_PORT=5672 RABBITMQ_WEBAPP_PORT=15672 RABBITMQ_POLLING_TIMEOUT=60 +RABBITMQ_SOCKET_TIMEOUT=86400 +RABBITMQ_VOLUME_DIR=./rabbitmq-data RABBITMQ_QUEUE_NAME=filenames AMAZON_LINUX_VERSION_TAG=2023.2.20231113.0 -TARGET_FILE_DIR=./records +TARGET_FILE_DIR=./records_test TARGET_FILE_EXTENSION=.csv PRODUCER_LOG_LEVEL=INFO -PRODUCER_LOG_FORMAT="[%(asctime)s | %(levelname)s | %(name)s] {%(filename)s:%(lineno)d} >> %(message)s" +PRODUCER_LOG_FORMAT="[%(asctime)s | %(levelname)s] {%(filename)s:%(lineno)d} >> %(message)s" PRODUCER_LOG_DATE_FORMAT="%Y-%m-%d %H:%M:%S" PRODUCER_LOG_DIR=./logs/producer PRODUCER_LOG_RETENTION=7 PRODUCER_LOG_ROTATION=midnight CONSUMER_LOG_LEVEL=INFO -CONSUMER_LOG_FORMAT="[%(asctime)s | %(levelname)s | %(name)s] {%(filename)s:%(lineno)d} >> %(message)s" +CONSUMER_LOG_FORMAT="[%(asctime)s | %(levelname)s] {%(filename)s:%(lineno)d} >> %(message)s" CONSUMER_LOG_DATE_FORMAT="%Y-%m-%d %H:%M:%S" -CONSUMER_LOG_DIR=./logs/producer +CONSUMER_LOG_DIR=./logs/consumer CONSUMER_LOG_RETENTION=7 CONSUMER_LOG_ROTATION=midnight -CONSUMER_REPLICAS=2 +CONSUMER_REPLICAS=16 -CSV_PARSER_RECOGNIZED_DATETIME_FORMATS="%Y-%m-%dT%H:%M:%S%z" CSV_PARSER_DELIMITER="," CSV_PARSER_FILE_EXTENSION=.csv diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1b81d3d..06c8fee 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -19,7 +19,7 @@ jobs: rabbitmq-username: ${{ steps.load-dotenv.outputs.RABBITMQ_USERNAME }} rabbitmq-password: ${{ steps.load-dotenv.outputs.RABBITMQ_PASSWORD }} rabbitmq-queue-name: ${{ steps.load-dotenv.outputs.RABBITMQ_QUEUE_NAME }} - csv-parser-recognized-datetime-formats: ${{ steps.load-dotenv.outputs.CSV_PARSER_RECOGNIZED_DATETIME_FORMATS }} + rabbitmq-socket-timeout: ${{ steps.load-dotenv.outputs.RABBITMQ_SOCKET_TIMEOUT }} csv-parser-delimiter: ${{ steps.load-dotenv.outputs.CSV_PARSER_DELIMITER }} csv-parser-file-extension: ${{ steps.load-dotenv.outputs.CSV_PARSER_FILE_EXTENSION }} steps: @@ -43,7 +43,7 @@ jobs: echo "RABBITMQ_USERNAME=$RABBITMQ_USERNAME" >> $GITHUB_OUTPUT echo "RABBITMQ_PASSWORD=$RABBITMQ_PASSWORD" >> $GITHUB_OUTPUT echo "RABBITMQ_QUEUE_NAME=$RABBITMQ_QUEUE_NAME" >> $GITHUB_OUTPUT - echo "CSV_PARSER_RECOGNIZED_DATETIME_FORMATS=$CSV_PARSER_RECOGNIZED_DATETIME_FORMATS" >> $GITHUB_OUTPUT + echo "RABBITMQ_SOCKET_TIMEOUT=$RABBITMQ_SOCKET_TIMEOUT" >> $GITHUB_OUTPUT echo "CSV_PARSER_DELIMITER=$CSV_PARSER_DELIMITER" >> $GITHUB_OUTPUT echo "CSV_PARSER_FILE_EXTENSION=$CSV_PARSER_FILE_EXTENSION" >> $GITHUB_OUTPUT test-producer: @@ -101,6 +101,7 @@ jobs: RABBITMQ_USERNAME: ${{ needs.load-dotenv.outputs.rabbitmq-username }} RABBITMQ_PASSWORD: ${{ needs.load-dotenv.outputs.rabbitmq-password }} RABBITMQ_QUEUE_NAME: ${{ needs.load-dotenv.outputs.rabbitmq-queue-name }} + RABBITMQ_SOCKET_TIMEOUT: ${{ needs.load-dotenv.outputs.rabbitmq-socket-timeout }} TARGET_FILE_DIR: ${{ needs.load-dotenv.outputs.target-file-dir }} TARGET_FILE_EXTENSION: ${{ needs.load-dotenv.outputs.target-file-extension }} - name: Output coverage file @@ -181,7 +182,7 @@ jobs: RABBITMQ_USERNAME: ${{ needs.load-dotenv.outputs.rabbitmq-username }} RABBITMQ_PASSWORD: ${{ needs.load-dotenv.outputs.rabbitmq-password }} RABBITMQ_QUEUE_NAME: ${{ needs.load-dotenv.outputs.rabbitmq-queue-name }} - CSV_PARSER_RECOGNIZED_DATETIME_FORMATS: ${{ needs.load-dotenv.outputs.csv-parser-recognized-datetime-formats }} + RABBITMQ_SOCKET_TIMEOUT: ${{ needs.load-dotenv.outputs.rabbitmq-socket-timeout }} CSV_PARSER_DELIMITER: ${{ needs.load-dotenv.outputs.csv-parser-delimiter }} CSV_PARSER_FILE_EXTENSION: ${{ needs.load-dotenv.outputs.csv-parser-file-extension }} - name: Output coverage file diff --git a/.gitignore b/.gitignore index f8663b2..8f28c95 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,8 @@ __pycache__ .mypy_cache records +records_test +logs +postgres-data +postgres-logs +rabbitmq-data diff --git a/Makefile b/Makefile index dbc154d..d3c8970 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ build: docker compose pull --ignore-buildable docker compose build up: - docker compose up -d --wait && docker compose logs -f --tail 100 records_producer records_consumer + docker compose up -d && docker compose logs -f --tail 100 records_producer records_consumer logs: docker compose logs -f --tail 100 records_producer records_consumer down: @@ -16,6 +16,8 @@ stats: docker stats --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.MemPerc}}\t{{.NetIO}}\t{{.BlockIO}}\t{{.PIDs}}" setup_test_env: docker compose -f docker-compose.test.yml up -d +teardown_test_env: + docker compose -f docker-compose.test.yml down test_producer: export POSTGRES_HOST=$(POSTGRES_HOST) && \ export POSTGRES_PORT=$(POSTGRES_PORT) && \ @@ -27,7 +29,7 @@ test_producer: export RABBITMQ_USERNAME=$(RABBITMQ_USERNAME) && \ export RABBITMQ_PASSWORD=$(RABBITMQ_PASSWORD) && \ export QUEUE_NAME=$(QUEUE_NAME) && \ - COVERAGE_FILE=.coverage_producer coverage run -m pytest -vx producer/tests + COVERAGE_FILE=.coverage_producer coverage run -m pytest -vx --last-failed producer/tests test_consumer: export POSTGRES_HOST=$(POSTGRES_HOST) && \ export POSTGRES_PORT=$(POSTGRES_PORT) && \ @@ -47,7 +49,7 @@ coverage_report: coverage report -m --omit="*/tests/*" test: test_producer test_consumer coverage_report -generate_csv: +generate_csv_demo: python test_generator.py \ --num-sensors $(GEN_NUM_SENSORS) \ --num-records $(GEN_NUM_RECORDS) \ @@ -55,3 +57,12 @@ generate_csv: --start-date $(GEN_START_DATE) \ --timezone $(GEN_TIMEZONE) \ --dir $(TARGET_FILE_DIR) + +generate_csv_end_to_end_test: + python test_generator.py \ + --num-sensors 10 \ + --num-records 5 \ + --record-interval 1 \ + --start-date 2021-01-01 \ + --timezone Asia/Shanghai \ + --dir records_test diff --git a/README.md b/README.md index 6b7a7c8..f31345f 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# producer_consumer_csv +# Producer Consumer CSV ![Build Status](https://github.com/github/docs/actions/workflows/test.yml/badge.svg) ![Code Coverage](./coverage.svg) @@ -6,16 +6,64 @@ ## Description This is a simple producer consumer application that reads a csv file and writes the data to a database. The application is written in Python. +## Architecture +The application is composed of the following components: +- Producer (Reads the csv file and sends the data to the message queue) +- Consumer (Reads the data from the message queue and writes the data to the database) +- Persistent Message Queue (RabbitMQ) +- Database (Postgres) + +All the components are running in docker containers. The producer and consumer are running in multiple containers. The producer and consumer containers are scaled using docker compose. The producer and consumer containers are running in different containers to simulate a real world scenario where the producer and consumer are running in different servers. + +For performance, you can scale the number of consumer containers by changing the `CONSUMER_REPLICAS` variable in the .env file. The default value is 16. + +### Requirements +- Python 3.11 +- Docker +- Make + +### Database Schema +The initialization script is located in the `database/assets` folder. The script will create the following tables: + +#### records +|column|type|description| +|------|----|-----------| +|record_time|timestamp with timezone|The time when the record was generated| +|sensor_id|text|The id of the sensor| +|value|double precision|The value of the sensor| + +### Database Indexes +|index|columns| +|-----|-------| +|PK|sensor_id, record_time| +|BRIN|record_time| +|HASH|sensor_id| + +BRIN index is used for the `record_time` column as the data is generated in a sequential manner (time-series data). The HASH index is used for the `sensor_id` column as the data is usually queried by equality operation, but not range operation. The HASH index is more efficient than the BTREE index for equality operation. + +### Queue +The queue is implemented using RabbitMQ. The queue is configured to be persistent. The queue is configured to be durable and the messages are configured to be persistent. + +However, due to the complexity of the application, in this project the `get` operation is prefered over the `consume` operation, which stimulates a short polling queue. + +## Test Data +The test data is generated using the `generate_csv_demo` command. The command will generate a csv file with the following columns: +- `record_time`: The time when the record was generated +- `sensor_id`: The id of the sensor +- `value`: The value of the sensor + +You can check the section [Running the application](#running-the-application) for more details on how to generate the csv file. + ## Installation 1. Clone the repository 2. Install make and docker - For Ubuntu and Debian ```bash -sudo apt install make +$ sudo apt install make ``` - For Fedora and CentOS ```bash -sudo yum install make +$ sudo yum install make ``` For Docker installation, please refer to the [official documentation](https://docs.docker.com/engine/install/) @@ -35,17 +83,133 @@ Please make sure you don't change the name of the variables as they are used in ### Running the application -1. Run the following command to build the docker image +1. First run the `generate_csv_demo` command to generate a csv file. You can change the following parameters in the .env: +- `GEN_NUM_SENSORS` +- `GEN_NUM_RECORDS` +- `GEN_START_DATE` +- `GEN_RECORD_INTERVAL` +- `GEN_TIMEZONE` + +```bash +$ make gen_csv_demo +``` + +2. Run the following command to build the docker image ```bash $ make build ``` -2. Run the following command to start the docker compose stack + +3. Run the following command to start the docker compose stack. This will start the postgres database, rabbitmq, producer and different consumer containers. You can change the following parameters in the .env: +- `CONSUMER_REPLICAS` ```bash -$ make up / make up_d +$ make up ``` -The `up_d` command will run the container in detached mode. -3. Run the following command to stop the docker compose stack +4. Run the following command to stop the docker compose stack ```bash $ make down ``` + +### End to end test +While the unit tests are run as part of the build process, you can run the end to end test by running the following steps: + +1. Change the + +1. Run the make command `generate_csv_end_to_end_test`. It will generate 10 sensor's data of 5 records each. The data will be generated in the `records_test` folder. +```bash +$ make generate_csv_end_to_end_test +``` + +2. Run the following command to build the docker image +```bash +$ make build +``` + +3. Run the following command to start the docker compose stack. This will start the postgres database, rabbitmq, producer and different consumer containers. You can change the following parameters in the .env: +- `CONSUMER_REPLICAS` + +```bash +$ make up +``` + +4. Query the database to check if the data has been written to the database and check the record with the following data from sql: +```sql +SELECT * + FROM records + ORDER BY sensor_id ASC, record_time ASC +``` + +|record_time|sensor_id|value| +|-----------|---------|-----| +|2021-01-01 00:00:00.000 +0800|17fc695a_4|0.9100387476052705| +|2021-01-01 00:00:01.000 +0800|17fc695a_4|0.9470819312177097| +|2021-01-01 00:00:02.000 +0800|17fc695a_4|0.9646317173285254| +|2021-01-01 00:00:03.000 +0800|17fc695a_4|0.5588283283219546| +|2021-01-01 00:00:04.000 +0800|17fc695a_4|0.10032294940781161| +|2021-01-01 00:00:00.000 +0800|23b8c1e9_1|0.17833466762332717| +|2021-01-01 00:00:01.000 +0800|23b8c1e9_1|0.5828395773770179| +|2021-01-01 00:00:02.000 +0800|23b8c1e9_1|0.6709222475097419| +|2021-01-01 00:00:03.000 +0800|23b8c1e9_1|0.08392094150600504| +|2021-01-01 00:00:04.000 +0800|23b8c1e9_1|0.519270757199653| +|2021-01-01 00:00:00.000 +0800|47378190_8|0.8730491223149253| +|2021-01-01 00:00:01.000 +0800|47378190_8|0.9269235181749119| +|2021-01-01 00:00:02.000 +0800|47378190_8|0.7912797041193453| +|2021-01-01 00:00:03.000 +0800|47378190_8|0.7901636441724763| +|2021-01-01 00:00:04.000 +0800|47378190_8|0.7886736978911509| +|2021-01-01 00:00:00.000 +0800|6b65a6a4_7|0.10293554590959142| +|2021-01-01 00:00:01.000 +0800|6b65a6a4_7|0.2888706613682428| +|2021-01-01 00:00:02.000 +0800|6b65a6a4_7|0.4279942939571587| +|2021-01-01 00:00:03.000 +0800|6b65a6a4_7|0.23512685053378612| +|2021-01-01 00:00:04.000 +0800|6b65a6a4_7|0.5272935984703412| +|2021-01-01 00:00:00.000 +0800|972a8469_3|0.7642357069109641| +|2021-01-01 00:00:01.000 +0800|972a8469_3|0.5701299072914774| +|2021-01-01 00:00:02.000 +0800|972a8469_3|0.17473379247794074| +|2021-01-01 00:00:03.000 +0800|972a8469_3|0.12464021515158785| +|2021-01-01 00:00:04.000 +0800|972a8469_3|0.5390567336729636| +|2021-01-01 00:00:00.000 +0800|9a1de644_5|0.3758090093767995| +|2021-01-01 00:00:01.000 +0800|9a1de644_5|0.33553000407688316| +|2021-01-01 00:00:02.000 +0800|9a1de644_5|0.9667728274172214| +|2021-01-01 00:00:03.000 +0800|9a1de644_5|0.9549845776369301| +|2021-01-01 00:00:04.000 +0800|9a1de644_5|0.7740952070735415| +|2021-01-01 00:00:00.000 +0800|b74d0fb1_6|0.3213794858378719| +|2021-01-01 00:00:01.000 +0800|b74d0fb1_6|0.5947556423536645| +|2021-01-01 00:00:02.000 +0800|b74d0fb1_6|0.8872919823927438| +|2021-01-01 00:00:03.000 +0800|b74d0fb1_6|0.28297514015876457| +|2021-01-01 00:00:04.000 +0800|b74d0fb1_6|0.6590113969392454| +|2021-01-01 00:00:00.000 +0800|bd9c66b3_2|0.36466072100083013| +|2021-01-01 00:00:01.000 +0800|bd9c66b3_2|0.8408935901254108| +|2021-01-01 00:00:02.000 +0800|bd9c66b3_2|0.8945802964470245| +|2021-01-01 00:00:03.000 +0800|bd9c66b3_2|0.027150264273096747| +|2021-01-01 00:00:04.000 +0800|bd9c66b3_2|0.9236042897439161| +|2021-01-01 00:00:00.000 +0800|bdd640fb_0|0.0746765216767864| +|2021-01-01 00:00:01.000 +0800|bdd640fb_0|0.8404332126798344| +|2021-01-01 00:00:02.000 +0800|bdd640fb_0|0.31870553433981874| +|2021-01-01 00:00:03.000 +0800|bdd640fb_0|0.825033074919654| +|2021-01-01 00:00:04.000 +0800|bdd640fb_0|0.7161990766355211| +|2021-01-01 00:00:00.000 +0800|c241330b_9|0.6940489142492581| +|2021-01-01 00:00:01.000 +0800|c241330b_9|0.7748088833830469| +|2021-01-01 00:00:02.000 +0800|c241330b_9|0.85280342321841| +|2021-01-01 00:00:03.000 +0800|c241330b_9|0.32443698906841056| +|2021-01-01 00:00:04.000 +0800|c241330b_9|0.4457555011219805| + + +## Unit tests +The unit tests are run as part of the CI pipeline. You can run the unit tests locally by running the following steps: + +1. Run the make `setup_test_env` command to setup the test environment +```bash +$ make setup_test_env +``` + +2. Install the following pip packages: +```bash +$ pip install -r producer/requirements-dev.txt +$ pip install -r consumer/requirements-dev.txt +``` + +3. Run the following command to run the unit tests +```bash +$ make test +``` + +The unit test will run the both the producer and consumer unit tests. The coverage report will be generated in the `.coverage` file. diff --git a/consumer/dockerfile b/consumer/dockerfile index 0642ced..1721b01 100644 --- a/consumer/dockerfile +++ b/consumer/dockerfile @@ -1,40 +1,16 @@ ARG AMAZON_LINUX_VERSION_TAG -FROM amazonlinux:${AMAZON_LINUX_VERSION_TAG} as build +FROM amazonlinux:${AMAZON_LINUX_VERSION_TAG} RUN yum install -y \ python3.11 \ - python3.11-pip \ python3.11-devel \ - shadow-utils + python3.11-pip -RUN adduser app -ENV HOME=/home/app -WORKDIR ${HOME} - -RUN chown -R app:app /home/app - -USER app +WORKDIR /app COPY requirements.txt . -RUN python3.11 -m pip install --user --no-warn-script-location -r requirements.txt - -FROM amazonlinux:2023.2.20231026.0 as runtime - -RUN yum install -y \ - python3.11 \ - python3.11-pip \ - shadow-utils - -RUN adduser app -WORKDIR /home/app - -RUN chown -R app:app /home/app - -USER app -ENV HOME=/home/app -WORKDIR ${HOME} - -COPY --from=build /home/app/.local /home/app/.local +RUN python3.11 -m pip install -r requirements.txt COPY src ./src/ + CMD python3.11 -m src.deployments.script.main diff --git a/consumer/src/adapters/fetch_filenames_stream/rabbitmq.py b/consumer/src/adapters/fetch_filenames_stream/rabbitmq.py index bb27e75..0a1e5a0 100644 --- a/consumer/src/adapters/fetch_filenames_stream/rabbitmq.py +++ b/consumer/src/adapters/fetch_filenames_stream/rabbitmq.py @@ -21,6 +21,7 @@ def __init__( credentials_service: Callable[[], tuple[str, str]], queue: str = "filenames", polling_timeout: int = 10, + socket_timeout: int = 86400, ) -> None: self._host = host self._port = port @@ -30,6 +31,7 @@ def __init__( self._channel: Optional[BlockingChannel] = None self._polling_timeout = polling_timeout self._last_poll_time: Optional[datetime] = None + self._socket_timeout = socket_timeout @overload def ack(self, message_receipt: int) -> bool: @@ -104,6 +106,7 @@ def _get_amqp_conn(self) -> Iterator[Connection]: host=self._host, port=self._port, credentials=credentials, + socket_timeout=self._socket_timeout, ) self._conn = pika.BlockingConnection(conn_parameters) yield self._conn diff --git a/consumer/src/adapters/file_parse_iot_records/csv.py b/consumer/src/adapters/file_parse_iot_records/csv.py index a09c10b..a7a204a 100644 --- a/consumer/src/adapters/file_parse_iot_records/csv.py +++ b/consumer/src/adapters/file_parse_iot_records/csv.py @@ -14,12 +14,10 @@ class CSVParseIOTRecordsClient(FileParseIOTRecordsClient): def __init__( self, - recognized_datetime_formats: Sequence[str], delimiter: str = ",", file_extension: str = ".csv", ) -> None: self._delimiter = delimiter - self._recognized_datetime_formats = recognized_datetime_formats self._file_extension = file_extension @overload @@ -63,12 +61,10 @@ def parse_stream(self, filename: str) -> Iterator[IOTRecord]: logging.exception(e) def _parse_datetime(self, datetime_str: str) -> Optional[datetime]: - for datetime_format in self._recognized_datetime_formats: - try: - return datetime.strptime(datetime_str, datetime_format) - except ValueError: - pass - return None + try: + return datetime.fromisoformat(datetime_str) + except ValueError: + return None def _parse_value(self, value_str: str) -> Optional[Decimal]: try: @@ -102,6 +98,7 @@ def _parse_single(self, filename: str) -> Optional[list[IOTRecord]]: self._basic_file_check(filename) with open(filename) as csvfile: reader = csv.reader(csvfile, delimiter=self._delimiter) + next(reader) # skip header return list(self._parse_iter(reader)) except Exception as e: logging.exception(e) diff --git a/consumer/src/deployments/script/config.py b/consumer/src/deployments/script/config.py index 0ed6ae1..93840c3 100644 --- a/consumer/src/deployments/script/config.py +++ b/consumer/src/deployments/script/config.py @@ -19,6 +19,7 @@ class RabbitMQConfig: PASSWORD = os.getenv("RABBITMQ_PASSWORD", "guest") QUEUE = os.getenv("RABBITMQ_QUEUE_NAME", "filenames") POLLING_TIMEOUT = int(os.getenv("RABBITMQ_POLLING_TIMEOUT", 10)) + SOCKET_TIMEOUT = int(os.getenv("RABBITMQ_SOCKET_TIMEOUT", 86400)) class PostgresConfig: @@ -31,8 +32,5 @@ class PostgresConfig: class CSVParserConfig: - RECOGNIZED_DATETIME_FORMATS = os.getenv( - "CSV_PARSER_RECOGNIZED_DATETIME_FORMATS", "" - ).split(",") DELIMITER = os.getenv("CSV_PARSER_DELIMITER", ",") FILE_EXTENSION = os.getenv("CSV_PARSER_FILE_EXTENSION", ".csv") diff --git a/consumer/src/deployments/script/main.py b/consumer/src/deployments/script/main.py index 57cd029..a80ee6c 100644 --- a/consumer/src/deployments/script/main.py +++ b/consumer/src/deployments/script/main.py @@ -30,10 +30,10 @@ def main() -> None: credentials_service=lambda: (RabbitMQConfig.USERNAME, RabbitMQConfig.PASSWORD), queue=RabbitMQConfig.QUEUE, polling_timeout=RabbitMQConfig.POLLING_TIMEOUT, + socket_timeout=RabbitMQConfig.SOCKET_TIMEOUT, ) file_parse_iot_records_client = CSVParseIOTRecordsClient( - recognized_datetime_formats=CSVParserConfig.RECOGNIZED_DATETIME_FORMATS, delimiter=CSVParserConfig.DELIMITER, file_extension=CSVParserConfig.FILE_EXTENSION, ) @@ -47,6 +47,8 @@ def main() -> None: ) try: + logging.info("Starting to fetch filenames...") + for filename, receipt in fetch_filenames_stream_client.fetch_stream(): logging.info(f"Upserting {filename}...") iot_records_buffer: list[IOTRecord] = [] @@ -57,6 +59,8 @@ def main() -> None: if len(iot_records_buffer) < PostgresConfig.BATCH_UPSERT_SIZE: continue + logging.info(f"Upserting {len(iot_records_buffer)} records...") + _upsert_iot_records_buffer( iot_records_buffer, upsert_iot_records_client ) diff --git a/consumer/src/deployments/script/setup_logging.py b/consumer/src/deployments/script/setup_logging.py index dcae074..639d52b 100644 --- a/consumer/src/deployments/script/setup_logging.py +++ b/consumer/src/deployments/script/setup_logging.py @@ -5,9 +5,11 @@ def setup_logging() -> None: - LOG_LEVEL_INT = getattr(logging, LoggingConfig.LOG_LEVEL.upper(), None) + pathlib.Path(LoggingConfig.LOG_DIR).absolute().mkdir(parents=True, exist_ok=True) - pathlib.Path(LoggingConfig.LOG_DIR).mkdir(parents=True, exist_ok=True) + (pathlib.Path(LoggingConfig.LOG_DIR).absolute() / "info.log").touch() + (pathlib.Path(LoggingConfig.LOG_DIR).absolute() / "warning.log").touch() + (pathlib.Path(LoggingConfig.LOG_DIR).absolute() / "error.log").touch() handlers: list[logging.Handler] = [] @@ -20,50 +22,47 @@ def setup_logging() -> None: stream_handler.setLevel(LoggingConfig.LOG_LEVEL) handlers.append(stream_handler) - if LOG_LEVEL_INT is not None and LOG_LEVEL_INT <= logging.INFO: - info_handler = TimedRotatingFileHandler( - filename=f"{LoggingConfig.LOG_DIR}/info.log", - when=LoggingConfig.LOG_ROTATION, - interval=1, - backupCount=LoggingConfig.LOG_RETENTION, - ) - info_handler.setFormatter( - logging.Formatter( - LoggingConfig.LOG_FORMAT, datefmt=LoggingConfig.LOG_DATE_FORMAT - ) + info_handler = TimedRotatingFileHandler( + filename=f"{LoggingConfig.LOG_DIR}/info.log", + when=LoggingConfig.LOG_ROTATION, + interval=1, + backupCount=LoggingConfig.LOG_RETENTION, + ) + info_handler.setFormatter( + logging.Formatter( + LoggingConfig.LOG_FORMAT, datefmt=LoggingConfig.LOG_DATE_FORMAT ) - info_handler.setLevel(logging.INFO) - handlers.append(info_handler) + ) + info_handler.setLevel(logging.INFO) + handlers.append(info_handler) - if LOG_LEVEL_INT is not None and LOG_LEVEL_INT <= logging.WARNING: - warning_handler = TimedRotatingFileHandler( - filename=f"{LoggingConfig.LOG_DIR}/warning.log", - when=LoggingConfig.LOG_ROTATION, - interval=1, - backupCount=LoggingConfig.LOG_RETENTION, - ) - warning_handler.setFormatter( - logging.Formatter( - LoggingConfig.LOG_FORMAT, datefmt=LoggingConfig.LOG_DATE_FORMAT - ) + warning_handler = TimedRotatingFileHandler( + filename=f"{LoggingConfig.LOG_DIR}/warning.log", + when=LoggingConfig.LOG_ROTATION, + interval=1, + backupCount=LoggingConfig.LOG_RETENTION, + ) + warning_handler.setFormatter( + logging.Formatter( + LoggingConfig.LOG_FORMAT, datefmt=LoggingConfig.LOG_DATE_FORMAT ) - warning_handler.setLevel(logging.WARNING) - handlers.append(warning_handler) + ) + warning_handler.setLevel(logging.WARNING) + handlers.append(warning_handler) - if LOG_LEVEL_INT is not None and LOG_LEVEL_INT <= logging.ERROR: - error_handler = TimedRotatingFileHandler( - filename=f"{LoggingConfig.LOG_DIR}/error.log", - when=LoggingConfig.LOG_ROTATION, - interval=1, - backupCount=LoggingConfig.LOG_RETENTION, - ) - error_handler.setFormatter( - logging.Formatter( - LoggingConfig.LOG_FORMAT, datefmt=LoggingConfig.LOG_DATE_FORMAT - ) + error_handler = TimedRotatingFileHandler( + filename=f"{LoggingConfig.LOG_DIR}/error.log", + when=LoggingConfig.LOG_ROTATION, + interval=1, + backupCount=LoggingConfig.LOG_RETENTION, + ) + error_handler.setFormatter( + logging.Formatter( + LoggingConfig.LOG_FORMAT, datefmt=LoggingConfig.LOG_DATE_FORMAT ) - error_handler.setLevel(logging.ERROR) - handlers.append(error_handler) + ) + error_handler.setLevel(logging.ERROR) + handlers.append(error_handler) root_logger = logging.getLogger() root_logger.setLevel(LoggingConfig.LOG_LEVEL) diff --git a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/conftest.py b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/conftest.py index c964d6c..0a61683 100644 --- a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/conftest.py +++ b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/conftest.py @@ -82,7 +82,6 @@ def random_invalid_value_ndjson_file(setup_tempdir: Path) -> Path: @pytest.fixture(scope="function") def csv_parse_iot_records_client() -> CSVParseIOTRecordsClient: return CSVParseIOTRecordsClient( - recognized_datetime_formats=CSVParserConfig.RECOGNIZED_DATETIME_FORMATS, delimiter=CSVParserConfig.DELIMITER, file_extension=CSVParserConfig.FILE_EXTENSION, ) diff --git a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_parse.py b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_parse.py index a926c33..e6c284a 100644 --- a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_parse.py +++ b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/test_failed_parse.py @@ -41,6 +41,8 @@ def test_parse_single_datetime_failed_ignore_row( iot_records = csv_parse_iot_records_client.parse( random_invalid_datetime_csv_file ) + + print(iot_records) assert len(iot_records) == 0 assert "Unrecognized datetime format:" in caplog.text assert "Unrecognized value format:" not in caplog.text diff --git a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/utils.py b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/utils.py index aaec3f4..cc86435 100644 --- a/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/utils.py +++ b/consumer/tests/test_adapters/test_file_parse_iot_records/test_csv/utils.py @@ -22,35 +22,27 @@ def random_valid_format_rows() -> list[tuple[str, ...]]: random_datetime = datetime.now(tz=ZoneInfo(random_timezone)) - random_time_delta random_sensor_id = "".join(random.choices(string.ascii_letters, k=10)) random_value = Decimal(random.random() * 100) - rows.append((random_datetime.isoformat(), random_sensor_id, str(random_value))) + rows.append( + { + "record_time": random_datetime.isoformat(timespec="milliseconds"), + "sensor_id": random_sensor_id, + "value": str(random_value), + } + ) return rows def random_invalid_datetime_rows() -> list[tuple[str, ...]]: rows = [] - all_datetime_formats = [ - "%Y-%m-%dT%H:%M:%S%z", - "%Y-%m-%dT%H:%M%z", - "%Y-%m-%d %H:%M:%S%z", - "%Y-%m-%d %H:%M%z", - ] for _ in range(10): - random_timezone = random.choice(list(zoneinfo.available_timezones())) - random_time_delta = timedelta( - hours=random.randint(0, 24), - minutes=random.randint(0, 60), - seconds=random.randint(0, 60), - ) - random_datetime = datetime.now(tz=ZoneInfo(random_timezone)) - random_time_delta random_sensor_id = "".join(random.choices(string.ascii_letters, k=10)) random_value = Decimal(random.random() * 100) - random_datetime_format = random.choice(all_datetime_formats) rows.append( - ( - random_datetime.strftime(random_datetime_format), - random_sensor_id, - str(random_value), - ) + { + "record_time": "".join(random.choices(string.ascii_letters, k=10)), + "sensor_id": random_sensor_id, + "value": str(random_value), + } ) return rows @@ -68,40 +60,26 @@ def random_invalid_value_rows() -> list[tuple[str, ...]]: random_sensor_id = "".join(random.choices(string.ascii_letters, k=10)) random_value = "".join(random.choices(string.ascii_letters, k=10)) rows.append( - ( - random_datetime.isoformat(), - random_sensor_id, - random_value, - ) + { + "record_time": random_datetime.isoformat(timespec="milliseconds"), + "sensor_id": random_sensor_id, + "value": random_value, + } ) return rows def random_invalid_datetime_and_value_rows() -> list[tuple[str, ...]]: rows = [] - all_datetime_formats = [ - "%Y-%m-%dT%H:%M:%S.%f%z", - "%Y-%m-%dT%H:%M%z", - "%Y-%m-%d %H:%M:%S%z", - "%Y-%m-%d %H:%M%z", - ] for _ in range(10): - random_timezone = random.choice(list(zoneinfo.available_timezones())) - random_time_delta = timedelta( - hours=random.randint(0, 24), - minutes=random.randint(0, 60), - seconds=random.randint(0, 60), - ) - random_datetime = datetime.now(tz=ZoneInfo(random_timezone)) - random_time_delta random_sensor_id = "".join(random.choices(string.ascii_letters, k=10)) random_value = "".join(random.choices(string.ascii_letters, k=10)) - random_datetime_format = random.choice(all_datetime_formats) rows.append( - ( - random_datetime.strftime(random_datetime_format), - random_sensor_id, - str(random_value), - ) + { + "record_time": "".join(random.choices(string.ascii_letters, k=10)), + "sensor_id": random_sensor_id, + "value": random_value, + } ) return rows diff --git a/consumer/tests/test_deployments/test_script/test_main/test_main_read_file_resilience.py b/consumer/tests/test_deployments/test_script/test_main/test_main_read_file_resilience.py index 6883813..5c20651 100644 --- a/consumer/tests/test_deployments/test_script/test_main/test_main_read_file_resilience.py +++ b/consumer/tests/test_deployments/test_script/test_main/test_main_read_file_resilience.py @@ -55,6 +55,7 @@ def mock_open(*args, **kwargs): with open(random_csv_file, "r") as f: reader = csv.reader(f) + next(reader) for row in reader: record_time, sensor_id, value = row diff --git a/consumer/tests/test_deployments/test_script/test_main/test_main_successful.py b/consumer/tests/test_deployments/test_script/test_main/test_main_successful.py index e140c9b..6c5f6fa 100644 --- a/consumer/tests/test_deployments/test_script/test_main/test_main_successful.py +++ b/consumer/tests/test_deployments/test_script/test_main/test_main_successful.py @@ -42,6 +42,7 @@ def test_main_flow_single_no_failed_files( with open(random_csv_file, "r") as f: reader = csv.reader(f) + next(reader) for row in reader: record_time, sensor_id, value = row @@ -111,6 +112,7 @@ def test_main_flow_batch_no_failed_files( for random_csv_file in random_csv_files: with open(random_csv_file, "r") as f: reader = csv.reader(f) + next(reader) for row in reader: record_time, sensor_id, value = row @@ -178,6 +180,7 @@ def test_main_flow_single_in_batch_no_failed_files( with open(random_csv_file, "r") as f: reader = csv.reader(f) + next(reader) for row in reader: record_time, sensor_id, value = row @@ -247,6 +250,7 @@ def test_main_flow_batch_in_batch_no_failed_files( for random_csv_file in random_csv_files: with open(random_csv_file, "r") as f: reader = csv.reader(f) + next(reader) for row in reader: record_time, sensor_id, value = row diff --git a/consumer/tests/test_deployments/test_script/test_main/test_main_upsert_record_resilience.py b/consumer/tests/test_deployments/test_script/test_main/test_main_upsert_record_resilience.py index 5fab317..20bd46d 100644 --- a/consumer/tests/test_deployments/test_script/test_main/test_main_upsert_record_resilience.py +++ b/consumer/tests/test_deployments/test_script/test_main/test_main_upsert_record_resilience.py @@ -58,6 +58,7 @@ def mock_upsert(*args, **kwargs): with open(random_csv_file, "r") as f: reader = csv.reader(f) + next(reader) for row in reader: record_time, sensor_id, value = row @@ -136,6 +137,7 @@ def mock_upsert(self, records) -> list[bool]: with open(random_csv_file, "r") as f: reader = csv.reader(f) + next(reader) for row in reader: record_time, sensor_id, value = row diff --git a/consumer/tests/test_deployments/test_script/test_main/utils.py b/consumer/tests/test_deployments/test_script/test_main/utils.py index 56fce36..6609da2 100644 --- a/consumer/tests/test_deployments/test_script/test_main/utils.py +++ b/consumer/tests/test_deployments/test_script/test_main/utils.py @@ -22,35 +22,27 @@ def random_valid_format_rows() -> list[tuple[str, ...]]: random_datetime = datetime.now(tz=ZoneInfo(random_timezone)) - random_time_delta random_sensor_id = "".join(random.choices(string.ascii_letters, k=10)) random_value = Decimal(random.random() * 100) - rows.append((random_datetime.isoformat(), random_sensor_id, str(random_value))) + rows.append( + { + "record_time": random_datetime.isoformat(timespec="milliseconds"), + "sensor_id": random_sensor_id, + "value": str(random_value), + } + ) return rows def random_invalid_datetime_rows() -> list[tuple[str, ...]]: rows = [] - all_datetime_formats = [ - "%Y-%m-%dT%H:%M:%S%z", - "%Y-%m-%dT%H:%M%z", - "%Y-%m-%d %H:%M:%S%z", - "%Y-%m-%d %H:%M%z", - ] for _ in range(10): - random_timezone = random.choice(list(zoneinfo.available_timezones())) - random_time_delta = timedelta( - hours=random.randint(0, 24), - minutes=random.randint(0, 60), - seconds=random.randint(0, 60), - ) - random_datetime = datetime.now(tz=ZoneInfo(random_timezone)) - random_time_delta random_sensor_id = "".join(random.choices(string.ascii_letters, k=10)) random_value = Decimal(random.random() * 100) - random_datetime_format = random.choice(all_datetime_formats) rows.append( - ( - random_datetime.strftime(random_datetime_format), - random_sensor_id, - str(random_value), - ) + { + "record_time": "".join(random.choices(string.ascii_letters, k=10)), + "sensor_id": random_sensor_id, + "value": str(random_value), + } ) return rows @@ -68,40 +60,26 @@ def random_invalid_value_rows() -> list[tuple[str, ...]]: random_sensor_id = "".join(random.choices(string.ascii_letters, k=10)) random_value = "".join(random.choices(string.ascii_letters, k=10)) rows.append( - ( - random_datetime.isoformat(), - random_sensor_id, - random_value, - ) + { + "record_time": random_datetime.isoformat(timespec="milliseconds"), + "sensor_id": random_sensor_id, + "value": random_value, + } ) return rows def random_invalid_datetime_and_value_rows() -> list[tuple[str, ...]]: rows = [] - all_datetime_formats = [ - "%Y-%m-%dT%H:%M:%S%z", - "%Y-%m-%dT%H:%M%z", - "%Y-%m-%d %H:%M:%S%z", - "%Y-%m-%d %H:%M%z", - ] for _ in range(10): - random_timezone = random.choice(list(zoneinfo.available_timezones())) - random_time_delta = timedelta( - hours=random.randint(0, 24), - minutes=random.randint(0, 60), - seconds=random.randint(0, 60), - ) - random_datetime = datetime.now(tz=ZoneInfo(random_timezone)) - random_time_delta random_sensor_id = "".join(random.choices(string.ascii_letters, k=10)) random_value = "".join(random.choices(string.ascii_letters, k=10)) - random_datetime_format = random.choice(all_datetime_formats) rows.append( - ( - random_datetime.strftime(random_datetime_format), - random_sensor_id, - str(random_value), - ) + { + "record_time": "".join(random.choices(string.ascii_letters, k=10)), + "sensor_id": random_sensor_id, + "value": random_value, + } ) return rows @@ -110,7 +88,10 @@ def random_csv_file(base_dir: Path, rows: list[tuple[str, ...]]) -> str: filename = "".join(random.choices(string.ascii_letters, k=10)) + ".csv" filepath = base_dir.joinpath(filename) with open(filepath, "w") as csvfile: - writer = csv.writer(csvfile, delimiter=",") + writer = csv.DictWriter( + csvfile, delimiter=",", fieldnames=["record_time", "sensor_id", "value"] + ) + writer.writeheader() writer.writerows(rows) return str(filepath) @@ -119,7 +100,10 @@ def random_tsv_file(base_dir: Path, rows: list[tuple[str, ...]]) -> str: filename = "".join(random.choices(string.ascii_letters, k=10)) + ".tsv" filepath = base_dir.joinpath(filename) with open(filepath, "w") as csvfile: - writer = csv.writer(csvfile, delimiter="\t") + writer = csv.DictWriter( + csvfile, delimiter="\t", fieldnames=["record_time", "sensor_id", "value"] + ) + writer.writeheader() writer.writerows(rows) return str(filepath) diff --git a/database/dockerfile b/database/dockerfile index 021ced9..6b528c8 100644 --- a/database/dockerfile +++ b/database/dockerfile @@ -1,4 +1,3 @@ ARG POSTGRES_VERSION_TAG FROM docker.io/postgres:${POSTGRES_VERSION_TAG} -USER postgres COPY ./assets/create_records_table.sql /docker-entrypoint-initdb.d/init.sql diff --git a/docker-compose.test.yml b/docker-compose.test.yml index 46275ec..0d6d511 100644 --- a/docker-compose.test.yml +++ b/docker-compose.test.yml @@ -12,16 +12,33 @@ services: POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} POSTGRES_USER: ${POSTGRES_USERNAME} POSTGRES_DB: ${POSTGRES_DATABASE} + TZ: ${TZ} ports: - ${POSTGRES_PORT}:5432 + volumes: + - ${POSTGRES_VOLUME_DIR}:/var/lib/postgresql/data restart: always + healthcheck: + test: pg_isready -U ${POSTGRES_USERNAME} + interval: 15s + timeout: 10s + retries: 5 + records_rabbitmq: image: rabbitmq:${RABBITMQ_VERSION_TAG} container_name: records_rabbitmq environment: RABBITMQ_DEFAULT_USER: ${RABBITMQ_USERNAME} RABBITMQ_DEFAULT_PASS: ${RABBITMQ_PASSWORD} + TZ: ${TZ} ports: - ${RABBITMQ_WEBAPP_PORT}:15672 - ${RABBITMQ_PORT}:5672 + volumes: + - ${RABBITMQ_VOLUME_DIR}:/var/lib/rabbitmq restart: always + healthcheck: + test: rabbitmq-diagnostics -q ping + interval: 30s + timeout: 30s + retries: 3 diff --git a/docker-compose.yml b/docker-compose.yml index 4ca485c..f6c2dfe 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,15 +12,17 @@ services: POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} POSTGRES_USER: ${POSTGRES_USERNAME} POSTGRES_DB: ${POSTGRES_DATABASE} + TZ: ${TZ} ports: - ${POSTGRES_PORT}:5432 + volumes: + - ${POSTGRES_VOLUME_DIR}:/var/lib/postgresql/data restart: always healthcheck: - test: ["CMD", "pg_isready", "-U", "${POSTGRES_USERNAME}"] + test: pg_isready -U ${POSTGRES_USERNAME} interval: 15s timeout: 10s retries: 5 - start_period: 10s records_rabbitmq: image: rabbitmq:${RABBITMQ_VERSION_TAG} @@ -28,16 +30,18 @@ services: environment: RABBITMQ_DEFAULT_USER: ${RABBITMQ_USERNAME} RABBITMQ_DEFAULT_PASS: ${RABBITMQ_PASSWORD} + TZ: ${TZ} ports: - ${RABBITMQ_WEBAPP_PORT}:15672 - ${RABBITMQ_PORT}:5672 + volumes: + - ${RABBITMQ_VOLUME_DIR}:/var/lib/rabbitmq restart: always healthcheck: - test: ["CMD", "rabbitmqctl", "status"] - interval: 15s - timeout: 10s - retries: 5 - start_period: 10s + test: rabbitmq-diagnostics -q ping + interval: 30s + timeout: 30s + retries: 3 records_producer: image: records_producer:latest @@ -56,15 +60,16 @@ services: LOG_DIR: ${PRODUCER_LOG_DIR} LOG_RETENTION: ${PRODUCER_LOG_RETENTION} LOG_ROTATION: ${PRODUCER_LOG_ROTATION} - RABBITMQ_HOST: localhost + RABBITMQ_HOST: records_rabbitmq RABBITMQ_PORT: 5672 RABBITMQ_USERNAME: ${RABBITMQ_USERNAME} RABBITMQ_PASSWORD: ${RABBITMQ_PASSWORD} RABBITMQ_QUEUE_NAME: ${RABBITMQ_QUEUE_NAME} - network_mode: host + RABBITMQ_SOCKET_TIMEOUT: ${RABBITMQ_SOCKET_TIMEOUT} + TZ: ${TZ} volumes: - - ./${TARGET_FILE_DIR}:/home/app/${TARGET_FILE_DIR}:ro - # - ./${PRODUCER_LOG_DIR}:/home/app/${PRODUCER_LOG_DIR} + - ${TARGET_FILE_DIR}:/app/${TARGET_FILE_DIR}:ro + - ${PRODUCER_LOG_DIR}:/app/${PRODUCER_LOG_DIR} depends_on: records_postgres: condition: service_healthy @@ -85,25 +90,25 @@ services: LOG_DIR: ${CONSUMER_LOG_DIR} LOG_RETENTION: ${CONSUMER_LOG_RETENTION} LOG_ROTATION: ${CONSUMER_LOG_ROTATION} - RABBITMQ_HOST: localhost + RABBITMQ_HOST: records_rabbitmq RABBITMQ_PORT: 5672 RABBITMQ_USERNAME: ${RABBITMQ_USERNAME} RABBITMQ_PASSWORD: ${RABBITMQ_PASSWORD} RABBITMQ_QUEUE_NAME: ${RABBITMQ_QUEUE_NAME} RABBITMQ_POLLING_TIMEOUT: ${RABBITMQ_POLLING_TIMEOUT} - POSTGRES_HOST: localhost + RABBITMQ_SOCKET_TIMEOUT: ${RABBITMQ_SOCKET_TIMEOUT} + POSTGRES_HOST: records_postgres POSTGRES_PORT: 5432 POSTGRES_USERNAME: ${POSTGRES_USERNAME} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} POSTGRES_DATABASE: ${POSTGRES_DATABASE} POSTGRES_BATCH_UPSERT_SIZE: ${POSTGRES_BATCH_UPSERT_SIZE} - CSV_PARSER_RECOGNIZED_DATETIME_FORMATS: ${CSV_PARSER_RECOGNIZED_DATETIME_FORMATS} CSV_PARSER_DELIMITER: ${CSV_PARSER_DELIMITER} CSV_PARSER_FILE_EXTENSION: ${CSV_PARSER_FILE_EXTENSION} - network_mode: host + TZ: ${TZ} volumes: - - ./${TARGET_FILE_DIR}:/home/app/${TARGET_FILE_DIR}:ro - # - ./${CONSUMER_LOG_DIR}:/home/app/src/${CONSUMER_LOG_DIR} + - ${TARGET_FILE_DIR}:/app/${TARGET_FILE_DIR}:ro + - ${CONSUMER_LOG_DIR}:/app/${CONSUMER_LOG_DIR} deploy: mode: replicated replicas: ${CONSUMER_REPLICAS} diff --git a/producer/dockerfile b/producer/dockerfile index 7cdf707..1721b01 100644 --- a/producer/dockerfile +++ b/producer/dockerfile @@ -1,41 +1,16 @@ ARG AMAZON_LINUX_VERSION_TAG -FROM amazonlinux:${AMAZON_LINUX_VERSION_TAG} as build +FROM amazonlinux:${AMAZON_LINUX_VERSION_TAG} RUN yum install -y \ python3.11 \ python3.11-devel \ - python3.11-pip \ - shadow-utils + python3.11-pip -RUN adduser app -ENV HOME=/home/app -WORKDIR ${HOME} - -RUN chown -R app:app /home/app - -USER app +WORKDIR /app COPY requirements.txt . -RUN python3.11 -m pip install --user --no-warn-script-location -r requirements.txt - -FROM amazonlinux:2023.2.20231026.0 as runtime - -RUN yum install -y \ - python3.11 \ - python3.11-pip \ - shadow-utils - -RUN adduser app -WORKDIR /home/app +RUN python3.11 -m pip install -r requirements.txt COPY src ./src/ -RUN chown -R app:app /home/app - -USER app -ENV HOME=/home/app -WORKDIR ${HOME} - -COPY --from=build /home/app/.local /home/app/.local - CMD python3.11 -m src.deployments.script.main diff --git a/producer/src/adapters/publish_filenames/rabbitmq.py b/producer/src/adapters/publish_filenames/rabbitmq.py index a192e21..8f1346c 100644 --- a/producer/src/adapters/publish_filenames/rabbitmq.py +++ b/producer/src/adapters/publish_filenames/rabbitmq.py @@ -16,12 +16,14 @@ def __init__( port: int, credentials_service: Callable[[], tuple[str, str]], queue: str = "filenames", + socket_timeout: int = 86400, ) -> None: self._host = host self._port = port self._credentials_service = credentials_service self._queue = queue self._conn: Optional[Connection] = None + self._socket_timeout = socket_timeout @overload def publish(self, filename: str) -> bool: @@ -49,6 +51,7 @@ def _get_amqp_conn(self) -> Iterator[pika.BaseConnection]: host=self._host, port=self._port, credentials=credentials, + socket_timeout=self._socket_timeout, ) self._conn = pika.BlockingConnection(conn_parameters) yield self._conn diff --git a/producer/src/deployments/script/config.py b/producer/src/deployments/script/config.py index 3fdd49b..c98681d 100644 --- a/producer/src/deployments/script/config.py +++ b/producer/src/deployments/script/config.py @@ -23,3 +23,4 @@ class RabbitMQConfig: USERNAME = os.getenv("RABBITMQ_USERNAME", "guest") PASSWORD = os.getenv("RABBITMQ_PASSWORD", "guest") QUEUE = os.getenv("RABBITMQ_QUEUE_NAME", "filenames") + SOCKET_TIMEOUT = int(os.getenv("RABBITMQ_SOCKET_TIMEOUT", 86400)) diff --git a/producer/src/deployments/script/main.py b/producer/src/deployments/script/main.py index 10bea4e..ccf4e08 100644 --- a/producer/src/deployments/script/main.py +++ b/producer/src/deployments/script/main.py @@ -23,6 +23,7 @@ def main() -> None: port=RabbitMQConfig.PORT, credentials_service=lambda: (RabbitMQConfig.USERNAME, RabbitMQConfig.PASSWORD), queue=RabbitMQConfig.QUEUE, + socket_timeout=RabbitMQConfig.SOCKET_TIMEOUT, ) successes_map = {} diff --git a/producer/src/deployments/script/setup_logging.py b/producer/src/deployments/script/setup_logging.py index e41bc91..57025e8 100644 --- a/producer/src/deployments/script/setup_logging.py +++ b/producer/src/deployments/script/setup_logging.py @@ -5,9 +5,11 @@ def setup_logging() -> None: - LOG_LEVEL_INT = getattr(logging, LoggingConfig.LOG_LEVEL.upper(), None) + pathlib.Path(LoggingConfig.LOG_DIR).absolute().mkdir(parents=True, exist_ok=True) - pathlib.Path(LoggingConfig.LOG_DIR).mkdir(parents=True, exist_ok=True) + (pathlib.Path(LoggingConfig.LOG_DIR).absolute() / "info.log").touch() + (pathlib.Path(LoggingConfig.LOG_DIR).absolute() / "warning.log").touch() + (pathlib.Path(LoggingConfig.LOG_DIR).absolute() / "error.log").touch() handlers: list[logging.Handler] = [] @@ -20,9 +22,8 @@ def setup_logging() -> None: stream_handler.setLevel(LoggingConfig.LOG_LEVEL) handlers.append(stream_handler) - # if LOG_LEVEL_INT is not None and LOG_LEVEL_INT <= logging.INFO: info_handler = TimedRotatingFileHandler( - filename=f"{LoggingConfig.LOG_DIR}/info.log", + filename=str(pathlib.Path(LoggingConfig.LOG_DIR).absolute() / "info.log"), when=LoggingConfig.LOG_ROTATION, interval=1, backupCount=LoggingConfig.LOG_RETENTION, @@ -35,9 +36,8 @@ def setup_logging() -> None: info_handler.setLevel(logging.INFO) handlers.append(info_handler) - # if LOG_LEVEL_INT is not None and LOG_LEVEL_INT <= logging.WARNING: warning_handler = TimedRotatingFileHandler( - filename=f"{LoggingConfig.LOG_DIR}/warning.log", + filename=str(pathlib.Path(LoggingConfig.LOG_DIR).absolute() / "warning.log"), when=LoggingConfig.LOG_ROTATION, interval=1, backupCount=LoggingConfig.LOG_RETENTION, @@ -50,9 +50,8 @@ def setup_logging() -> None: warning_handler.setLevel(logging.WARNING) handlers.append(warning_handler) - # if LOG_LEVEL_INT is not None and LOG_LEVEL_INT <= logging.ERROR: error_handler = TimedRotatingFileHandler( - filename=f"{LoggingConfig.LOG_DIR}/error.log", + filename=str(pathlib.Path(LoggingConfig.LOG_DIR).absolute() / "error.log"), when=LoggingConfig.LOG_ROTATION, interval=1, backupCount=LoggingConfig.LOG_RETENTION, diff --git a/test_generator.py b/test_generator.py index 2d54770..4ad8944 100644 --- a/test_generator.py +++ b/test_generator.py @@ -4,13 +4,17 @@ import random from zoneinfo import ZoneInfo from pathlib import Path -from uuid import uuid4 +import uuid import logging -from tqdm.auto import tqdm from concurrent.futures import ThreadPoolExecutor, as_completed from io import StringIO import os +rng = random.Random() +rng.seed(42) + +uuid.uuid4 = lambda: uuid.UUID(int=rng.getrandbits(128)) + logging.basicConfig(level=logging.INFO) @@ -83,7 +87,7 @@ def generate_data( writer.writerows( [ { - "record_time": date.isoformat(), + "record_time": date.isoformat(timespec="milliseconds"), "sensor_id": sensor_id, "value": random_value, } @@ -109,23 +113,22 @@ def main( base_dir = Path(dir) base_dir.mkdir(exist_ok=True) futures = [] - with tqdm(total=num_sensors) as pbar: - with ThreadPoolExecutor(max_workers=os.cpu_count() * 2) as executor: - for i in range(num_sensors): - sensor_id = f"{uuid4().hex[:8]}_{i}" - futures.append( - executor.submit( - generate_data, - sensor_id, - num_records, - record_interval, - start_date, - base_dir, - ) + with ThreadPoolExecutor(max_workers=os.cpu_count() * 2) as executor: + for i in range(num_sensors): + sensor_id = f"{uuid.uuid4().hex[:8]}_{i}" + futures.append( + executor.submit( + generate_data, + sensor_id, + num_records, + record_interval, + start_date, + base_dir, ) + ) - for _ in as_completed(futures): - pbar.update(1) + for _ in as_completed(futures): + pass logging.info("Done")