Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move to pandoc for rendering sponsorship contracts #2343

Merged
merged 6 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,18 @@ jobs:
steps:
- name: Check out repository
uses: actions/checkout@v2
- name: Install platform dependencies
run: |
sudo apt -y update
sudo apt -y install --no-install-recommends \
texlive-latex-base \
texlive-latex-recommended \
texlive-plain-generic \
lmodern
- name: Install pandoc
run: |
wget https://github.com/jgm/pandoc/releases/download/2.17.1.1/pandoc-2.17.1.1-1-amd64.deb
sudo dpkg -i pandoc-2.17.1.1-1-amd64.deb
- uses: actions/setup-python@v2
with:
python-version: 3.9.16
Expand Down
Empty file added Aptfile
Empty file.
42 changes: 40 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,47 @@
FROM python:3.9-bullseye
FROM python:3.9-bookworm
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1

# By default, Docker has special steps to avoid keeping APT caches in the layers, which
# is good, but in our case, we're going to mount a special cache volume (kept between
# builds), so we WANT the cache to persist.
RUN set -eux; \
rm -f /etc/apt/apt.conf.d/docker-clean; \
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache;

# Install System level build requirements, this is done before
# everything else because these are rarely ever going to change.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
set -x \
&& apt-get update \
&& apt-get install --no-install-recommends -y \
pandoc \
texlive-latex-base \
texlive-latex-recommended \
texlive-fonts-recommended \
texlive-plain-generic \
lmodern

RUN case $(uname -m) in \
"x86_64") ARCH=amd64 ;; \
"aarch64") ARCH=arm64 ;; \
esac \
&& wget --quiet https://github.com/jgm/pandoc/releases/download/2.17.1.1/pandoc-2.17.1.1-1-${ARCH}.deb \
&& dpkg -i pandoc-2.17.1.1-1-${ARCH}.deb

RUN mkdir /code
WORKDIR /code

COPY dev-requirements.txt /code/
COPY base-requirements.txt /code/
RUN pip install -r dev-requirements.txt

RUN pip --no-cache-dir --disable-pip-version-check install --upgrade pip setuptools wheel

RUN --mount=type=cache,target=/root/.cache/pip \
set -x \
&& pip --disable-pip-version-check \
install \
-r dev-requirements.txt

COPY . /code/
7 changes: 3 additions & 4 deletions base-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,11 @@ django-filter==2.4.0
django-ordered-model==3.4.3
django-widget-tweaks==1.4.8
django-countries==7.2.1
xhtml2pdf==0.2.5
django-easy-pdf3==0.1.2
num2words==0.5.10
django-polymorphic==3.0.0
sorl-thumbnail==12.7.0
docxtpl==0.12.0
reportlab==3.6.6
django-extensions==3.1.4
django-import-export==2.7.1

pypandoc==1.12
panflute==2.3.0
1 change: 0 additions & 1 deletion pydotorg/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@
'ordered_model',
'widget_tweaks',
'django_countries',
'easy_pdf',
'sorl.thumbnail',

'banners',
Expand Down
89 changes: 89 additions & 0 deletions sponsors/contracts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import os
import tempfile

from django.http import HttpResponse
from django.template.loader import render_to_string
from django.utils.dateformat import format

import pypandoc

dirname = os.path.dirname(__file__)
DOCXPAGEBREAK_FILTER = os.path.join(dirname, "pandoc_filters/pagebreak.py")
REFERENCE_DOCX = os.path.join(dirname, "reference.docx")


def _clean_split(text, separator="\n"):
return [
t.replace("-", "").strip()
for t in text.split("\n")
if t.replace("-", "").strip()
]


def _contract_context(contract, **context):
start_date = contract.sponsorship.start_date
context.update(
{
"contract": contract,
"start_date": start_date,
"start_day_english_suffix": format(start_date, "S"),
"sponsor": contract.sponsorship.sponsor,
"sponsorship": contract.sponsorship,
"benefits": _clean_split(contract.benefits_list.raw),
"legal_clauses": _clean_split(contract.legal_clauses.raw),
}
)
previous_effective = contract.sponsorship.previous_effective_date
context["previous_effective"] = previous_effective if previous_effective else "UNKNOWN"
context["previous_effective_english_suffix"] = format(previous_effective, "S") if previous_effective else "UNKNOWN"
return context


def render_markdown_from_template(contract, **context):
template = "sponsors/admin/contracts/sponsorship-agreement.md"
if contract.sponsorship.renewal:
template = "sponsors/admin/contracts/renewal-agreement.md"
context = _contract_context(contract, **context)
return render_to_string(template, context)


def render_contract_to_pdf_response(request, contract, **context):
response = HttpResponse(
render_contract_to_pdf_file(contract, **context), content_type="application/pdf"
)
return response


def render_contract_to_pdf_file(contract, **context):
with tempfile.NamedTemporaryFile() as docx_file:
with tempfile.NamedTemporaryFile(suffix=".pdf") as pdf_file:
markdown = render_markdown_from_template(contract, **context)
pdf = pypandoc.convert_text(
markdown, "pdf", outputfile=pdf_file.name, format="md"
)
return pdf_file.read()


def render_contract_to_docx_response(request, contract, **context):
response = HttpResponse(
render_contract_to_docx_file(contract, **context),
content_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
)
response[
"Content-Disposition"
] = f"attachment; filename={'sponsorship-renewal' if contract.sponsorship.renewal else 'sponsorship-contract'}-{contract.sponsorship.sponsor.name.replace(' ', '-').replace('.', '')}.docx"
return response


def render_contract_to_docx_file(contract, **context):
markdown = render_markdown_from_template(contract, **context)
with tempfile.NamedTemporaryFile() as docx_file:
docx = pypandoc.convert_text(
markdown,
"docx",
outputfile=docx_file.name,
format="md",
filters=[DOCXPAGEBREAK_FILTER],
extra_args=[f"--reference-doc", REFERENCE_DOCX],
)
return docx_file.read()
Empty file.
90 changes: 90 additions & 0 deletions sponsors/pandoc_filters/pagebreak.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# ------------------------------------------------------------------------------
# Source: https://github.com/pandocker/pandoc-docx-pagebreak-py/
# Revision: c8cddccebb78af75168da000a3d6ac09349bef73
# ------------------------------------------------------------------------------
# MIT License
#
# Copyright (c) 2018 pandocker
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# ------------------------------------------------------------------------------

""" pandoc-docx-pagebreakpy
Pandoc filter to insert pagebreak as openxml RawBlock
Only for docx output

Trying to port pandoc-doc-pagebreak
- https://github.com/alexstoick/pandoc-docx-pagebreak
"""

import panflute as pf


class DocxPagebreak(object):
pagebreak = pf.RawBlock("<w:p><w:r><w:br w:type=\"page\" /></w:r></w:p>", format="openxml")
sectionbreak = pf.RawBlock("<w:p><w:pPr><w:sectPr><w:type w:val=\"nextPage\" /></w:sectPr></w:pPr></w:p>",
format="openxml")
toc = pf.RawBlock(r"""
<w:sdt>
<w:sdtContent xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:p>
<w:r>
<w:fldChar w:fldCharType="begin" w:dirty="true" />
<w:instrText xml:space="preserve">TOC \o "1-3" \h \z \u</w:instrText>
<w:fldChar w:fldCharType="separate" />
<w:fldChar w:fldCharType="end" />
</w:r>
</w:p>
</w:sdtContent>
</w:sdt>
""", format="openxml")

def action(self, elem, doc):
if isinstance(elem, pf.RawBlock):
if elem.text == r"\newpage":
if (doc.format == "docx"):
pf.debug("Page Break")
elem = self.pagebreak
# elif elem.text == r"\newsection":
# if (doc.format == "docx"):
# pf.debug("Section Break")
# elem = self.sectionbreak
# else:
# elem = []
elif elem.text == r"\toc":
if (doc.format == "docx"):
pf.debug("Table of Contents")
para = [pf.Para(pf.Str("Table"), pf.Space(), pf.Str("of"), pf.Space(), pf.Str("Contents"))]
div = pf.Div(*para, attributes={"custom-style": "TOC Heading"})
elem = [div, self.toc]
else:
elem = []
return elem


def main(doc=None):
dp = DocxPagebreak()
return pf.run_filter(dp.action, doc=doc)


if __name__ == "__main__":
main()
78 changes: 0 additions & 78 deletions sponsors/pdf.py

This file was deleted.

Binary file added sponsors/reference.docx
Binary file not shown.
39 changes: 39 additions & 0 deletions sponsors/tests/test_contracts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from datetime import date
from model_bakery import baker
from unittest.mock import patch, Mock

from django.http import HttpRequest
from django.test import TestCase
from django.utils.dateformat import format

from sponsors.contracts import render_contract_to_docx_response


class TestRenderContract(TestCase):
def setUp(self):
self.contract = baker.make_recipe("sponsors.tests.empty_contract", sponsorship__start_date=date.today())

# DOCX unit test
def test_render_response_with_docx_attachment(self):
request = Mock(HttpRequest)
self.contract.sponsorship.renewal = False
response = render_contract_to_docx_response(request, self.contract)

self.assertEqual(response.get("Content-Disposition"), "attachment; filename=sponsorship-contract-Sponsor.docx")
self.assertEqual(
response.get("Content-Type"),
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
)


# DOCX unit test
def test_render_renewal_response_with_docx_attachment(self):
request = Mock(HttpRequest)
self.contract.sponsorship.renewal = True
response = render_contract_to_docx_response(request, self.contract)

self.assertEqual(response.get("Content-Disposition"), "attachment; filename=sponsorship-renewal-Sponsor.docx")
self.assertEqual(
response.get("Content-Type"),
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
)
Loading
Loading