Implementing text annotations - close #60 (#134)

py-pdf · Apr 21, 2021 · d4504c6 · d4504c6
1 parent f78f685
commit d4504c6
Show file tree

Hide file tree

Showing 11 changed files with 167 additions and 87 deletions.
diff --git a/.github/workflows/continuous-integration-workflow.yml b/.github/workflows/continuous-integration-workflow.yml
@@ -49,7 +49,7 @@ jobs:
           # Ensuring there is no `generate=True` left remaining in calls to assert_pdf_equal:
           grep -IRF generate=True test/ && exit 1
           # Executing all tests:
-          pytest -vv
+          RUN_NETWORK_TESTS=1 pytest -vv
           # Uploading coverage report to codecov.io
           bash <(curl -s https://codecov.io/bash)
       - name: Generating HTML documentation 🏗️

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,7 @@ and [PEP 440](https://www.python.org/dev/peps/pep-0440/).
 ## [2.3.3] - not released yet
 ### Added
 - new features: **document outline & table of contents**! Check out the new dedicated [documentation page](https://pyfpdf.github.io/fpdf2/DocumentOutlineAndTableOfContents.html) for more information
+- new method `text_annotation` to insert... Text Annotations
 - `FPDF.image` now also accepts an `io.BytesIO` as input
 ### Fixed
 - `HTMLMixin` / `HTML2FPDF`: properly handling `<img>` inside `<td>` & allowing to center them horizontally

diff --git a/README.md b/README.md
@@ -60,6 +60,8 @@ Features:
  * Generate [Code 39](https://fr.wikipedia.org/wiki/Code_39) & [Interleaved 2 of 5](https://en.wikipedia.org/wiki/Interleaved_2_of_5) barcodes
  * Cell / multi-cell / plaintext writing, automatic page breaks
  * Basic conversion from HTML to PDF
+ * Images & links alternative descriptions
+ * Table of contents & [document outline](https://pyfpdf.github.io/fpdf2/DocumentOutlineAndTableOfContents.html)
  * Clean error handling through exceptions
  * Only **one** dependency so far: [Pillow](https://pillow.readthedocs.io/en/stable/)
  * Unit tests with `qpdf`-based PDF diffing

diff --git a/docs/Images.md b/docs/Images.md
@@ -0,0 +1,57 @@
+# Images #
+
+When rendering an image, its size on the page can be specified in several ways:
+
+* explicit width and height (expressed in user units)
+* one explicit dimension, the other being calculated automatically in order to keep the original proportions
+* no explicit dimension, in which case the image is put at 72 dpi
+
+Note that if an image is displayed several times, only one copy is embedded in the file.
+
+
+## Simple example ##
+
+```python
+from fpdf import FPDF
+
+pdf = fpdf.FPDF()
+pdf.add_page()
+pdf.image("docs/fpdf2-logo.png", x=20, y=60)
+pdf.output("pdf-with-image.pdf")
+```
+
+
+## Alternative description ##
+
+A textual description of the image can be provided, for accessibility purposes:
+
+```python
+pdf.image("docs/fpdf2-logo.png", x=20, y=60, alt_text="Snake logo of the fpdf2 library")
+```
+
+
+## Usage with Pillow ##
+
+You can perform image manipulations using the [Pillow](https://pillow.readthedocs.io/en/stable/) library,
+and easily embed the result:
+
+```python
+from fpdf import FPDF
+from PIL import Image
+
+pdf = fpdf.FPDF()
+pdf.add_page()
+img = Image.open("docs/fpdf2-logo.png")
+img = img.crop((10, 10, 490, 490)).resize((96, 96), resample=Image.NEAREST)
+pdf.image(img, x=80, y=100)
+pdf.output("pdf-with-image.pdf")
+```
+
+
+## Image URLs ##
+
+URLs to images can be directly passed to the [`image`](fpdf/fpdf.html#fpdf.fpdf.FPDF.image) method:
+
+```python
+pdf.image("https://upload.wikimedia.org/wikipedia/commons/7/70/Example.png")
+```
diff --git a/docs/Links.md b/docs/Links.md
@@ -78,5 +78,15 @@ pdf.cell(w=100, h=10, txt="Internal link to first page", border=1, align="C", li
 pdf.output("internal_link.pdf")
 ```
 
-Similarly, `FPDF.link` can be instead of `FPDF.cell`,
+Similarly, `FPDF.link` can be used instead of `FPDF.cell`,
 however `write_html` does not allow to define internal links.
+
+
+## Alternative description ##
+
+An optional textual description of the link can be provided, for accessibility purposes:
+
+```python
+pdf.link(x=0, y=0, w=width, h=line_height, link="https://github.com/PyFPDF/fpdf2",
+         alt_text="GitHub page for fpdf2")
+```
diff --git a/docs/index.md b/docs/index.md
@@ -23,8 +23,10 @@ This repository is a fork of the library's [original port by Max Pat](http://www
 * PNG, GIF and JPG support (including transparency and alpha channel)
 * Shape, Line Drawing
 * Generate [Code 39](https://fr.wikipedia.org/wiki/Code_39) & [Interleaved 2 of 5](https://en.wikipedia.org/wiki/Interleaved_2_of_5) barcodes
-* Cell/Multi-cell/Plaintext writing, Automatic page breaks
+* Cell / multi-cell / plaintext writing, automatic page breaks
 * Basic conversion from HTML to PDF
+* Images & links alternative descriptions
+* Table of contents & [document outline](https://pyfpdf.github.io/fpdf2/DocumentOutlineAndTableOfContents.html)
 * Clean error handling through exceptions
 * Only **one** dependency so far: [Pillow](https://pillow.readthedocs.io/en/stable/)
 * Unit tests with `qpdf`-based PDF diffing

diff --git a/fpdf/fpdf.py b/fpdf/fpdf.py
@@ -25,6 +25,7 @@
 import sys
 import warnings
 import zlib
+from collections import defaultdict
 from collections.abc import Sequence
 from contextlib import contextmanager
 from datetime import datetime
@@ -89,12 +90,14 @@ class DocumentState(IntEnum):
     CLOSED = 3  # EOF printed
 
 
-class PageLink(NamedTuple):
+class Annotation(NamedTuple):
+    type: str
     x: int
     y: int
     width: int
     height: int
-    link: str
+    contents: str = None
+    link: Union[str, int] = None
     alt_text: Optional[str] = None
 
 
@@ -205,7 +208,7 @@ def __init__(
         self.font_files = {}  # array of font files
         self.diffs = {}  # array of encoding differences
         self.images = {}  # array of used images
-        self.page_links = {}  # array of PageLink
+        self.annots = defaultdict(list)  # map page numbers to arrays of Annotations
         self.links = {}  # array of InternalLink
         self.in_footer = 0  # flag set when processing footer
         self.lasth = 0  # height of last cell printed
@@ -1025,7 +1028,7 @@ def set_link(self, link, y=0, page=-1):
 
     def link(self, x, y, w, h, link, alt_text=None):
         """
-        Puts a link on a rectangular area of the page.
+        Puts a link annotation on a rectangular area of the page.
         Text or image links are generally put via [cell](#fpdf.FPDF.cell),
         [write](#fpdf.FPDF.write) or [image](#fpdf.FPDF.image),
         but this method can be useful for instance to define a clickable area inside an image.
@@ -1035,19 +1038,40 @@ def link(self, x, y, w, h, link, alt_text=None):
             y (int): vertical position (from the top) to the bottom side of the link rectangle
             w (int): width of the link rectangle
             h (int): width of the link rectangle
-            link (str): either an URL or a integer returned by `add_link`, defining an internal link to a page
+            link: either an URL or a integer returned by `add_link`, defining an internal link to a page
             alt_text (str): optional textual description of the link, for accessibility purposes
         """
-        if self.page not in self.page_links:
-            self.page_links[self.page] = []
-        self.page_links[self.page].append(
-            PageLink(
+        self.annots[self.page].append(
+            Annotation(
+                "Link",
                 x * self.k,
                 self.h_pt - y * self.k,
                 w * self.k,
                 h * self.k,
-                link,
-                alt_text,
+                link=link,
+                alt_text=alt_text,
+            )
+        )
+
+    def text_annotation(self, x, y, w, h, text):
+        """
+        Puts a text annotation on a rectangular area of the page.
+
+        Args:
+            x (int): horizontal position (from the left) to the left side of the link rectangle
+            y (int): vertical position (from the top) to the bottom side of the link rectangle
+            w (int): width of the link rectangle
+            h (int): width of the link rectangle
+            text (str): text to display
+        """
+        self.annots[self.page].append(
+            Annotation(
+                "Text",
+                x * self.k,
+                self.h_pt - y * self.k,
+                w * self.k,
+                h * self.k,
+                contents=text,
             )
         )
 
@@ -1171,7 +1195,7 @@ def cell(self, w, h=0, txt="", border=0, ln=0, align="", fill=False, link=""):
                 `R`: right align
             fill (bool): Indicates if the cell background must be painted (`True`)
                 or transparent (`False`). Default value: False.
-            link (str): optional link to add on the image, internal
+            link (str): optional link to add on the cell, internal
                 (identifier returned by `add_link`) or external URL.
 
         Returns: a boolean indicating if page break was triggered
@@ -1362,7 +1386,7 @@ def multi_cell(
                 or transparent (`False`). Default value: False.
             split_only (bool): if `True`, does not output anything, only perform
                 word-wrapping and return the resulting multi-lines array of strings.
-            link (str): optional link to add on the image, internal
+            link (str): optional link to add on the cell, internal
                 (identifier returned by `add_link`) or external URL.
             ln (int): Indicates where the current position should go after the call.
                 Possible values are: `0`: to the bottom right ; `1`: to the beginning
@@ -1668,8 +1692,8 @@ def image(
         * when using an animated GIF, only the first frame is used.
 
         Args:
-            name: either a string representing a file path to an image, an io.BytesIO,
-                or a instance of `PIL.Image.Image`
+            name: either a string representing a file path to an image, an URL to an image,
+                an io.BytesIO, or a instance of `PIL.Image.Image`
             x (int): optional horizontal position where to put the image on the page.
                 If not specified or equal to None, the current abscissa is used.
             y (int): optional vertical position where to put the image on the page.
@@ -1851,10 +1875,6 @@ def _putpages(self):
             dh_pt = self.dw_pt
         filter = "/Filter /FlateDecode " if self.compress else ""
         for n in range(1, nb + 1):
-            # page object from pages[n]
-            # page object from pages[n]#w_pt
-            # page object from pages[n]#h_pt
-            # page object from page_links[n] if page_links and page_links[n]
             # Page
             self._newobj()
             self._out("<</Type /Page")
@@ -1865,47 +1885,52 @@ def _putpages(self):
                 self._out(f"/MediaBox [0 0 {w_pt:.2f} {h_pt:.2f}]")
             self._out("/Resources 2 0 R")
 
-            if self.page_links and n in self.page_links:
-                # Links
-                annots = "/Annots ["
-                for pl in self.page_links[n]:
+            page_annots = self.annots[n]
+            if page_annots:  # Annotations, e.g. links:
+                annots = ""
+                for annot in page_annots:
                     # first four things in 'link' list are coordinates?
                     rect = (
-                        f"{pl.x:.2f} {pl.y:.2f} "
-                        f"{pl.x + pl.width:.2f} {pl.y - pl.height:.2f}"
+                        f"{annot.x:.2f} {annot.y:.2f} "
+                        f"{annot.x + annot.width:.2f} {annot.y - annot.height:.2f}"
                     )
 
                     # start the annotation entry
                     annots += (
-                        f"<</Type /Annot /Subtype /Link "
+                        f"<</Type /Annot /Subtype /{annot.type} "
                         f"/Rect [{rect}] /Border [0 0 0] "
                         # Flag "Print" (bit position 3) specifies to print
                         # the annotation when the page is printed.
                         # cf. https://docs.verapdf.org/validation/pdfa-part1/#rule-653-2
                         f"/F 4"
                     )
 
-                    if pl.alt_text is not None:
+                    if annot.contents:
+                        annots += f"/Contents {enclose_in_parens(annot.contents)}"
+
+                    if annot.alt_text is not None:
                         # Note: the spec indicates that a /StructParent could be added **inside* this /Annot,
                         # but tests with Adobe Acrobat Reader reveal that the page /StructParents inserted below
                         # is enough to link the marked content in the hierarchy tree with this annotation link.
                         self._add_marked_content(
-                            self.n, struct_type="/Link", alt_text=pl.alt_text
+                            self.n, struct_type="/Link", alt_text=annot.alt_text
                         )
 
-                    # HTML ending of annotation entry
-                    if isinstance(pl.link, str):
-                        annots += f"/A <</S /URI /URI {enclose_in_parens(pl.link)}>>"
-                    else:  # Dest type ending of annotation entry
-                        assert pl.link in self.links, (
-                            f"Page {n} has a link with an invalid index: "
-                            f"{pl.link} (doc #links={len(self.links)})"
-                        )
-                        link = self.links[pl.link]
-                        annots += f"/Dest {link.dest(self)}"
+                    if annot.link:
+                        if isinstance(annot.link, str):
+                            annots += (
+                                f"/A <</S /URI /URI {enclose_in_parens(annot.link)}>>"
+                            )
+                        else:  # Dest type ending of annotation entry
+                            assert annot.link in self.links, (
+                                f"Page {n} has a link with an invalid index: "
+                                f"{annot.link} (doc #links={len(self.links)})"
+                            )
+                            link = self.links[annot.link]
+                            annots += f"/Dest {link.dest(self)}"
                     annots += ">>"
                 # End links list
-                self._out(f"{annots}]")
+                self._out(f"/Annots [{annots}]")
             if self.pdf_version > "1.3":
                 self._out("/Group <</Type /Group /S /Transparency" "/CS /DeviceRGB>>")
             spid = self._struct_parents_id_per_page.get(self.n)

diff --git a/mkdocs.yml b/mkdocs.yml
@@ -23,6 +23,7 @@ nav:
 - 'Tutorial':                       'Tutorial.md'
 - 'Existing PDFs':                  'ExistingPDFs.md'
 - 'HTML':                           'HTML.md'
+- 'Images':                         'Images.md'
 - 'Links':                          'Links.md'
 - 'Tables':                         'Tables.md'
 - 'Layout':

diff --git a/test/image/test_url_images.py b/test/image/test_url_images.py
@@ -1,3 +1,4 @@
+import os
 from pathlib import Path
 
 import pytest
@@ -8,55 +9,13 @@
 HERE = Path(__file__).resolve().parent
 
 
-@pytest.mark.skip("skip network tests by default")
+@pytest.mark.skipif(
+    "RUN_NETWORK_TESTS" not in os.environ, reason="skip network tests by default"
+)
 def test_png_url(tmp_path):
     pdf = fpdf.FPDF()
     pdf.compress = False
     pdf.add_page()
     png = "https://upload.wikimedia.org/wikipedia/commons/7/70/Example.png"
     pdf.image(png, x=15, y=15, w=30, h=25)
     assert_pdf_equal(pdf, HERE / "image_png_url.pdf", tmp_path)
-
-
-@pytest.mark.skip("skip network tests by default")
-def test_jpg_url(tmp_path):
-    pdf = fpdf.FPDF()
-    pdf.compress = False
-    pdf.add_page()
-    jpg = (
-        "https://upload.wikimedia.org/wikipedia/commons/8/8c/"
-        "JPEG_example_JPG_RIP_025.jpg"
-    )
-    pdf.image(jpg, x=15, y=15)
-    assert_pdf_equal(pdf, HERE / "image_jpg_url.pdf", tmp_path)
-
-
-## Code used to create test:
-# pdf = fpdf.FPDF()
-# pdf.compress = False
-# pdf.add_page()
-# png = "https://upload.wikimedia.org/wikipedia/commons/7/70/Example.png"
-# pdf.image(png, x = 15, y = 15, w = 30, h = 25)
-
-# test = relative_path_to('output.pdf')
-
-# set_doc_date_0(pdf)
-# pdf.output(test)
-
-# print(calculate_hash_of_file(test))
-# os.unlink(test)
-
-# pdf = fpdf.FPDF()
-# pdf.compress = False
-# pdf.add_page()
-# jpg = ("https://upload.wikimedia.org/wikipedia/commons/8/8c/"
-#        "JPEG_example_JPG_RIP_025.jpg")
-# pdf.image(jpg, x = 15, y = 15)
-
-# test = relative_path_to('output.pdf')
-
-# set_doc_date_0(pdf)
-# pdf.output(test)
-
-# print(calculate_hash_of_file(test))
-# os.unlink(test)
diff --git a/test/simple_text_annotation.pdf b/test/simple_text_annotation.pdf