Skip to content

Commit

Permalink
Updated requirements.
Browse files Browse the repository at this point in the history
  • Loading branch information
vladd-bit committed Mar 23, 2023
1 parent f7911ec commit fb41569
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 12 deletions.
4 changes: 2 additions & 2 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# Tesseract model path
TESSDATA_PREFIX = os.environ.get("TESSDATA_PREFIX", "/usr/local/share/tessdata")

# Integer or Float - duration in seconds for the OCR processing, after which, pytesseract will terminate and raise RuntimeError
# Integer or Float - duration in seconds for the OCR processing, after which, tesseract will terminate and raise RuntimeError
TESSERACT_TIMEOUT = os.environ.get("OCR_SERVICE_TESSERACT_TIMEOUT", 30)

# Tesseract language code string. Defaults to eng if not specified! Example for multiple languages: lang='eng+fra'
Expand All @@ -26,7 +26,7 @@
# Integer - modifies the processor priority for the Tesseract run. Not supported on Windows. Nice adjusts the niceness of unix-like processes.
TESSERACT_NICE = int(os.environ.get("OCR_SERVICE_TESSERACT_NICE", -18))

# Any additional custom configuration flags that are not available via the pytesseract function. For example: config='--psm 6'
# Any additional custom configuration flags that are not available via the tesseract function. For example: config='--psm 6'
TESSERACT_CUSTOM_CONFIG_FLAGS = os.environ.get("OCR_SERVICE_TESSERACT_CUSTOM_CONFIG_FLAGS", "")

# controls both threads and cpus
Expand Down
2 changes: 1 addition & 1 deletion ocr_service/api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def process() -> Response:
log.info("Processing binary as data-binary, generating temporary file name...")
file_name = uuid.uuid4().hex
log.info("Generated file name:" + file_name)

stream = request.get_data(cache=False, as_text=False, parse_form_data=False)

output_text, doc_metadata = processor.process_stream(stream=stream, file_name=file_name)
Expand Down
2 changes: 2 additions & 0 deletions ocr_service/processor/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ def _preprocess_pdf_to_img(self, stream: bytes) -> List[PILImage]:
"""

self.log.info("pre-processing pdf...")

pdf_image_pages = []

try:
pdf_image_pages, doc_metadata = self._pdf_to_img(stream)
Expand Down
17 changes: 8 additions & 9 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
unoserver==1.2
pytesseract==0.3.10
unoserver==1.3
filetype==1.2.0
opencv-python==4.7.0.68
opencv-python==4.7.0.72
Pillow==9.4.0
html2image==2.0.1
Flask==2.2.2
Werkzeug==2.2.2
html2image==2.0.3
Flask==2.2.3
Werkzeug==2.2.3
injector==0.20.1
gunicorn==20.1.0
tesserocr==2.5.2
pypdfium2==3.18.0
tesserocr==2.6.0
pypdfium2==3.21.1
psutil==5.9.4
uharfbuzz==0.33.0
uharfbuzz==0.35.0
pyxml2pdf==0.3.4

0 comments on commit fb41569

Please sign in to comment.