From 3c7363f0503aa68b51906ade21c90e2e1900e328 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 8 Oct 2024 09:58:04 +0000 Subject: [PATCH] Fix type hints and add CHANGELOG Nota: the two on content and mimetype are just linked to https://github.com/openzim/python-scraperlib/issues/196 and will have to be reverted once this issue is fixed --- CHANGELOG.md | 8 ++++++++ src/warc2zim/items.py | 2 +- tests/test_warc_to_zim.py | 8 ++++++-- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aaec197..77f29ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Enrich test website with img srcset situations (in preparation for #403) + +### Fixed + +- HTML document can be retrieved as `fetch` resource type (#405) + ## [2.1.1] - 2024-09-05 ### Changed diff --git a/src/warc2zim/items.py b/src/warc2zim/items.py index 83dae05..891cfb6 100644 --- a/src/warc2zim/items.py +++ b/src/warc2zim/items.py @@ -9,7 +9,7 @@ from pathlib import Path from jinja2.environment import Template -from libzim.writer import Hint # pyright: ignore[reportMissingImports] +from libzim.writer import Hint # pyright: ignore[reportMissingModuleSource] from warcio.recordloader import ArcWarcRecord from zimscraperlib.types import get_mime_for_name from zimscraperlib.zim.items import StaticItem diff --git a/tests/test_warc_to_zim.py b/tests/test_warc_to_zim.py index 4bfcf75..8e23392 100644 --- a/tests/test_warc_to_zim.py +++ b/tests/test_warc_to_zim.py @@ -202,11 +202,15 @@ def verify_warc_and_zim(self, warcfile, zimfile): elif record.rec_type == "response": # We must have a payload assert payload - payload_content = payload.content.tobytes() + payload_content = ( + payload.content.tobytes() # pyright:ignore[reportAttributeAccessIssue] + ) # if HTML, still need to account for the head insert, otherwise should # have exact match - if payload.mimetype.startswith("text/html"): + if payload.mimetype.startswith( # pyright:ignore[reportAttributeAccessIssue] + "text/html" + ): assert head_insert in payload_content elif record.rec_type == "resource": # we do not want to embed resources "as-is"