From 649d97ae2bf4be0d7dc6651cdfb9c9b02a3c5952 Mon Sep 17 00:00:00 2001 From: Gabo Date: Mon, 12 Feb 2024 14:51:11 +0100 Subject: [PATCH] Handle PDF conversion errors --- src/QueueProcessor.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/QueueProcessor.py b/src/QueueProcessor.py index 6536a9a..2540887 100644 --- a/src/QueueProcessor.py +++ b/src/QueueProcessor.py @@ -50,17 +50,7 @@ def process(self, id, message, rc, ts): extraction_data = extract_paragraphs_asynchronous(task) if not extraction_data: - extraction_message = ExtractionMessage( - tenant=task.tenant, - task=task.task, - params=task.params, - success=False, - error_message="Error getting the xml from the pdf", - ) - - self.results_queue.sendMessage().message(extraction_message.model_dump_json()).execute() - self.logger.error(extraction_message.model_dump_json()) - return True + raise FileNotFoundError service_url = f"{config.SERVICE_HOST}:{config.SERVICE_PORT}" results_url = f"{service_url}/get_paragraphs/{task.tenant}/{task.params.filename}" @@ -78,10 +68,23 @@ def process(self, id, message, rc, ts): self.pdf_paragraph_db.paragraphs.insert_one(json.loads(extraction_data_json)) self.logger.info(f"Results Redis message: {extraction_message}") self.results_queue.sendMessage(delay=5).message(extraction_message.model_dump_json()).execute() - return True + + except FileNotFoundError: + extraction_message = ExtractionMessage( + tenant=task.tenant, + task=task.task, + params=task.params, + success=False, + error_message="Error getting the xml from the pdf", + ) + + self.results_queue.sendMessage().message(extraction_message.model_dump_json()).execute() + self.logger.error(extraction_message.model_dump_json()) + except Exception: self.logger.error("error extracting the paragraphs", exc_info=1) - return True + + return True def subscribe_to_extractions_tasks_queue(self): while True: