From a645bd4fa26a44b6900167a9c06370bfd301f41b Mon Sep 17 00:00:00 2001 From: Gabo Date: Fri, 16 Aug 2024 17:54:11 +0200 Subject: [PATCH] Update document layout analysis service --- .github/workflows/push_docker_image.yml | 2 -- README.md | 19 +++---------------- docker-compose-test.yml | 2 +- docker-compose.yml | 2 +- requirements.txt | 2 +- 5 files changed, 6 insertions(+), 21 deletions(-) diff --git a/.github/workflows/push_docker_image.yml b/.github/workflows/push_docker_image.yml index fb9139a..812c1db 100644 --- a/.github/workflows/push_docker_image.yml +++ b/.github/workflows/push_docker_image.yml @@ -2,8 +2,6 @@ name: Create and publish Docker image on: push: - branches: - - 'main' tags: - 'v*' diff --git a/README.md b/README.md index 254d4e5..404c239 100755 --- a/README.md +++ b/README.md @@ -8,25 +8,19 @@ ## Docker containers A redis server is needed to use the service asynchronously. For that matter, it can be used the -command `make start:testing` that has a built-in +command `make start-test` that has a built-in redis server. Containers with `make start` -![Alt logo](readme_pictures/docker_compose_up.png?raw=true "docker-compose up") +Containers with `make start-test` -Containers with `make start:testing` -![Alt logo](readme_pictures/docker_compose_redis.png?raw=true "docker-compose -f docker-compose-service-with-redis.yml up") - - -## How to use it asynchronously +## How to use it 1. Send PDF to extract curl -X POST -F 'file=@/PATH/TO/PDF/pdf_name.pdf' localhost:5051/async_extraction/[tenant_name] -![Alt logo](readme_pictures/send_materials.png?raw=true "Send PDF to extract") - 2. Add extraction task @@ -38,9 +32,6 @@ Python code: message_json = '{"tenant": "tenant_name", "task": "segmentation", "params": {"filename": "pdf_file_name.pdf"}}' message = queue.sendMessage(message_json).exceptions(False).execute() - -![Alt logo](readme_pictures/extraction.png?raw=true "Add extraction task") - 3. Get paragraphs When the segmentation task is done, a message is placed in the results queue: @@ -66,11 +57,8 @@ or in python requests.get(results_message.data_url) requests.get(results_message.file_url) -![Alt logo](readme_pictures/get_paragraphs.png?raw=true "Get paragraphs") - ## HTTP server -![Alt logo](readme_pictures/http_server.png?raw=true "HTTP server") The container `HTTP server` is coded using Python 3.9 and uses the [FastApi](https://fastapi.tiangolo.com/) web framework. @@ -84,7 +72,6 @@ The errors are reported to the file `docker_volume/service.log`, if the configur ## Queue processor -![Alt logo](readme_pictures/queue_processor.png?raw=true "Queue processor") The container `Queue processor` is coded using Python 3.9, and it is on charge of the communication with redis. diff --git a/docker-compose-test.yml b/docker-compose-test.yml index 8cc029b..d7bef59 100755 --- a/docker-compose-test.yml +++ b/docker-compose-test.yml @@ -43,7 +43,7 @@ services: worker-pdf-layout: container_name: "worker-pdf-layout" entrypoint: [ "gunicorn", "-k", "uvicorn.workers.UvicornWorker", "--chdir", "./src", "app:app", "--bind", "0.0.0.0:5060", "--timeout", "10000"] - image: ghcr.io/huridocs/pdf-document-layout-analysis:0.0.7 + image: ghcr.io/huridocs/pdf-document-layout-analysis:0.0.11 init: true restart: unless-stopped ports: diff --git a/docker-compose.yml b/docker-compose.yml index d11be24..3e59d58 100755 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -40,7 +40,7 @@ services: worker-pdf-layout-gpu: container_name: "worker-pdf-layout-gpu" entrypoint: [ "gunicorn", "-k", "uvicorn.workers.UvicornWorker", "--chdir", "./src", "app:app", "--bind", "0.0.0.0:5060", "--timeout", "10000"] - image: ghcr.io/huridocs/pdf-document-layout-analysis:0.0.7 + image: ghcr.io/huridocs/pdf-document-layout-analysis:0.0.11 init: true restart: unless-stopped network_mode: host diff --git a/requirements.txt b/requirements.txt index cfc8d02..fe8a747 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/huridocs/pdf-document-layout-analysis@df0e6e0924e8214bf9d050c709e193e93cbf81ff +git+https://github.com/huridocs/pdf-document-layout-analysis@d6cbcc4891391fd9f2fc577c9cef6f9c8f7d9e6f graypy==2.1.0 PyYAML==6.0.1 pymongo==4.8.0