Skip to content

Commit

Permalink
Update rapid-latex-ocr
Browse files Browse the repository at this point in the history
  • Loading branch information
ali6parmak committed Nov 29, 2024
1 parent ac8d850 commit 2ecdd5d
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 7 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ Run the service:

- With GPU support:
```
docker run --rm --name pdf-document-layout-analysis --gpus '"device=0"' -p 5060:5060 --entrypoint ./start.sh huridocs/pdf-document-layout-analysis:v0.0.19
docker run --rm --name pdf-document-layout-analysis --gpus '"device=0"' -p 5060:5060 --entrypoint ./start.sh huridocs/pdf-document-layout-analysis:v0.0.20
```

- Without GPU support:
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "pdf-document-layout-analysis"
version = "2024.11.12.1"
version = "2024.11.29.1"
description = "This tool is for PDF document layout analysis"
license = { file = "LICENSE" }
authors = [{ name = "HURIDOCS" }]
Expand Down Expand Up @@ -29,7 +29,7 @@ dependencies = [
"roman==4.2",
"hydra-core==1.3.2",
"pypandoc==1.13",
"rapid-latex-ocr==0.0.8",
"rapid-latex-ocr==0.0.9",
"struct_eqtable @ git+https://github.com/UniModal4Reasoning/StructEqTable-Deploy.git@fd06078bfa9364849eb39330c075dd63cbed73ff"
]

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,5 @@ setuptools==75.4.0
roman==4.2
hydra-core==1.3.2
pypandoc==1.13
rapid-latex-ocr==0.0.8
rapid-latex-ocr==0.0.9
git+https://github.com/UniModal4Reasoning/StructEqTable-Deploy.git@fd06078bfa9364849eb39330c075dd63cbed73ff
6 changes: 3 additions & 3 deletions src/extraction_formats/extract_formula_formats.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import io
from PIL.Image import Image
from rapid_latex_ocr import LatexOCR
from rapid_latex_ocr import LaTeXOCR
from data_model.PdfImages import PdfImages
from fast_trainer.PdfSegment import PdfSegment
from pdf_token_type_labels.TokenType import TokenType
Expand All @@ -10,7 +10,7 @@ def has_arabic(text: str) -> bool:
return any("\u0600" <= char <= "\u06FF" or "\u0750" <= char <= "\u077F" for char in text)


def get_latex_format(model: LatexOCR, formula_image: Image):
def get_latex_format(model: LaTeXOCR, formula_image: Image):
buffer = io.BytesIO()
formula_image.save(buffer, format="jpeg")
image_bytes = buffer.getvalue()
Expand All @@ -25,7 +25,7 @@ def extract_formula_format(pdf_images: PdfImages, predicted_segments: list[PdfSe
if not formula_segments:
return

model = LatexOCR()
model = LaTeXOCR()

for index, formula_segment in formula_segments:
if has_arabic(formula_segment.text_content):
Expand Down

0 comments on commit 2ecdd5d

Please sign in to comment.