Skip to content

Commit e27e796

Browse files
authored
Merge pull request #1 from explosion/pre-release-fixes
add unit tests in ci
2 parents b6a31a3 + dad0cc4 commit e27e796

File tree

4 files changed

+59
-5
lines changed

4 files changed

+59
-5
lines changed

.github/workflows/unit_tests.yml

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
name: Unit Tests
2+
3+
on:
4+
pull_request:
5+
push:
6+
branches:
7+
- main
8+
9+
jobs:
10+
setup:
11+
runs-on: ubuntu-latest
12+
steps:
13+
- uses: actions/checkout@v3
14+
- name: Set up Python 3.9
15+
uses: actions/setup-python@v4
16+
with:
17+
python-version: 3.9
18+
cache: "pip" # caching pip dependencies
19+
20+
- name: Check out Prodigy
21+
uses: actions/checkout@v3
22+
with:
23+
repository: explosion/prodigy
24+
ref: v1.14.0
25+
path: ./prodigy
26+
ssh-key: ${{ secrets.GHA_PRODIGY_READ }}
27+
28+
- name: Install prodigy
29+
run: |
30+
ls -la
31+
pip install ./prodigy
32+
33+
- name: Install dependencies
34+
run: |
35+
pip install --upgrade pip
36+
pip install -e .
37+
pip install ruff pytest
38+
39+
- name: Run ruff
40+
if: always()
41+
shell: bash
42+
run: python -m ruff prodigy_pdf tests
43+
44+
- name: Run pytest
45+
if: always()
46+
shell: bash
47+
run: python -m pytest tests

prodigy_pdf/__init__.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Dict, List
1+
from typing import List
22
import base64
33
from io import BytesIO
44
from pathlib import Path
@@ -10,14 +10,16 @@
1010
from prodigy.util import msg
1111

1212
def page_to_image(page: pdfium.PdfPage) -> str:
13+
"""Turns a PdfPage into a base64 image for Prodigy"""
1314
pil_image = page.render().to_pil()
14-
buffered = BytesIO()
15-
pil_image.save(buffered, format="JPEG")
16-
img_str = base64.b64encode(buffered.getvalue())
15+
with BytesIO() as buffered:
16+
pil_image.save(buffered, format="JPEG")
17+
img_str = base64.b64encode(buffered.getvalue())
1718
return f"data:image/png;base64,{img_str.decode('utf-8')}"
1819

1920

20-
def generate_pdf_pages(pdf_paths: List[Path]) -> Dict:
21+
def generate_pdf_pages(pdf_paths: List[Path]):
22+
"""Generate dictionaries that contain an image for each page in the PDF"""
2123
for pdf_path in pdf_paths:
2224
pdf = pdfium.PdfDocument(pdf_path)
2325
n_pages = len(pdf)
@@ -30,6 +32,7 @@ def generate_pdf_pages(pdf_paths: List[Path]) -> Dict:
3032
"pdf": pdf_path.parts[-1],
3133
}
3234
})
35+
pdf.close()
3336

3437

3538
@recipe(
@@ -47,6 +50,7 @@ def pdf_image_manual(
4750
labels:str,
4851
remove_base64:bool=False
4952
) -> ControllerComponentsDict:
53+
"""Turns pdfs into images in order to annotate them."""
5054
# Read in stream as a list for progress bar.
5155
if not pdf_folder.exists():
5256
msg.fail(f"Folder `{pdf_folder}` does not exist.", exits=True)

pyproject.toml

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[tool.ruff]
2+
line-length = 120

setup.cfg

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ zip_safe = true
1010
python_requires = >=3.8
1111
install_requires =
1212
pypdfium2==4.20.0
13+
Pillow==9.4.0
1314

1415
[options.entry_points]
1516
prodigy_recipes =

0 commit comments

Comments
 (0)