From f1b16d8bd5b33a1d19e383602e59dbb62f33fc18 Mon Sep 17 00:00:00 2001 From: Shihua Ma Date: Tue, 23 Apr 2024 10:02:17 +0800 Subject: [PATCH 1/2] Update parse.py Fix bug with https://github.com/Filimoa/open-parse/issues/28 --- src/openparse/tables/pymupdf/parse.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/openparse/tables/pymupdf/parse.py b/src/openparse/tables/pymupdf/parse.py index b8699bf..5b11f3d 100644 --- a/src/openparse/tables/pymupdf/parse.py +++ b/src/openparse/tables/pymupdf/parse.py @@ -22,7 +22,10 @@ def output_to_html(headers: List[str], rows: List[List[str]]) -> str: def output_to_markdown(headers: List[str], rows: List[List[str]]) -> str: - markdown_output = "| " + " | ".join(headers) + " |\n" + markdown_output = "" + for header in headers: + markdown_output += "|" + (header or "") + markdown_output += " |\n" markdown_output += "|---" * len(headers) + "|\n" for row in rows: From 64283d024964ceda705b43b2a69038d36b9b9f09 Mon Sep 17 00:00:00 2001 From: Sergey Date: Wed, 24 Apr 2024 08:48:48 -0600 Subject: [PATCH 2/2] fixed minor formatting issues --- pyproject.toml | 2 +- src/openparse/tables/pymupdf/parse.py | 13 +++++++++---- src/openparse/version.py | 2 +- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 64be442..d0d1373 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ name = "openparse" description = "Streamlines the process of preparing documents for LLM's." readme = "README.md" requires-python = ">=3.8" -version = "0.5.3" +version = "0.5.4" authors = [{name = "Sergey Filimonov", email = "hello@sergey.fyi"}] dependencies = [ "PyMuPDF >= 1.23.2", diff --git a/src/openparse/tables/pymupdf/parse.py b/src/openparse/tables/pymupdf/parse.py index 5b11f3d..95cd2d9 100644 --- a/src/openparse/tables/pymupdf/parse.py +++ b/src/openparse/tables/pymupdf/parse.py @@ -23,13 +23,18 @@ def output_to_html(headers: List[str], rows: List[List[str]]) -> str: def output_to_markdown(headers: List[str], rows: List[List[str]]) -> str: markdown_output = "" - for header in headers: - markdown_output += "|" + (header or "") - markdown_output += " |\n" + if headers is not None: + for header in headers: + safe_header = "" if header is None else header + markdown_output += "| " + safe_header + " " + + markdown_output += "|\n" markdown_output += "|---" * len(headers) + "|\n" for row in rows: - processed_row = [" " if cell in [None, ""] else cell.replace("\n", " ") for cell in row] + processed_row = [ + " " if cell in [None, ""] else cell.replace("\n", " ") for cell in row + ] markdown_output += "| " + " | ".join(processed_row) + " |\n" return markdown_output diff --git a/src/openparse/version.py b/src/openparse/version.py index 98a90a8..e894b5d 100644 --- a/src/openparse/version.py +++ b/src/openparse/version.py @@ -1,4 +1,4 @@ -OPEN_PARSE_VERSION = "0.5.3" +OPEN_PARSE_VERSION = "0.5.4" def version_info() -> str: