diff --git a/CHANGELOG.md b/CHANGELOG.md index aaec1973..77f29ca9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Enrich test website with img srcset situations (in preparation for #403) + +### Fixed + +- HTML document can be retrieved as `fetch` resource type (#405) + ## [2.1.1] - 2024-09-05 ### Changed diff --git a/src/warc2zim/content_rewriting/generic.py b/src/warc2zim/content_rewriting/generic.py index 4f41668e..cd13804f 100644 --- a/src/warc2zim/content_rewriting/generic.py +++ b/src/warc2zim/content_rewriting/generic.py @@ -154,7 +154,7 @@ def get_rewrite_mode(self, record, mimetype): def get_resourcetype_rewrite_mode(self, record, resourcetype, mimetype): """Get current record rewrite mode based on WARC-Resource-Type and mimetype""" - if resourcetype in ["document", "xhr"] and mimetype == "text/html": + if resourcetype in ["document", "xhr", "fetch"] and mimetype == "text/html": # TODO : Handle header "Accept" == "application/json" if getattr(record, "method", "GET") == "GET": return "html" diff --git a/src/warc2zim/items.py b/src/warc2zim/items.py index 83dae055..891cfb65 100644 --- a/src/warc2zim/items.py +++ b/src/warc2zim/items.py @@ -9,7 +9,7 @@ from pathlib import Path from jinja2.environment import Template -from libzim.writer import Hint # pyright: ignore[reportMissingImports] +from libzim.writer import Hint # pyright: ignore[reportMissingModuleSource] from warcio.recordloader import ArcWarcRecord from zimscraperlib.types import get_mime_for_name from zimscraperlib.zim.items import StaticItem diff --git a/test-website/content/image-srcset.html b/test-website/content/image-srcset.html new file mode 100644 index 00000000..f1005d4e --- /dev/null +++ b/test-website/content/image-srcset.html @@ -0,0 +1,102 @@ + + + + + Test website + + + + + + + + +

+ This page contains tests around varying images based on screen / device. +

+ +

Image srcset

+ +

+ An image should be displayed below at all screen sizes and pixel ratios. +

+ + an image + +

+ Another image should be displayed below at all screen sizes and pixel + ratios. +

+ + an image + +

Picture sources - with srcset pixel ratio

+ +

+ An image should be displayed below at all screen sizes and pixel ratios. +

+ + + + an image + + +

Picture sources - with media queries

+ +

+ An image should be displayed below at all screen sizes and pixel ratios. +

+ + + + + + an image + + + diff --git a/test-website/content/images/image1-1x.png b/test-website/content/images/image1-1x.png new file mode 100644 index 00000000..b3a685e1 Binary files /dev/null and b/test-website/content/images/image1-1x.png differ diff --git a/test-website/content/images/image1-2x.png b/test-website/content/images/image1-2x.png new file mode 100644 index 00000000..fa1534bf Binary files /dev/null and b/test-website/content/images/image1-2x.png differ diff --git a/test-website/content/images/image2-1x.png b/test-website/content/images/image2-1x.png new file mode 100644 index 00000000..b3a685e1 Binary files /dev/null and b/test-website/content/images/image2-1x.png differ diff --git a/test-website/content/images/image2-2x.png b/test-website/content/images/image2-2x.png new file mode 100644 index 00000000..fa1534bf Binary files /dev/null and b/test-website/content/images/image2-2x.png differ diff --git a/test-website/content/images/image2.png b/test-website/content/images/image2.png new file mode 100644 index 00000000..3e7fe67d Binary files /dev/null and b/test-website/content/images/image2.png differ diff --git a/test-website/content/images/image3-high.png b/test-website/content/images/image3-high.png new file mode 100644 index 00000000..2de92303 Binary files /dev/null and b/test-website/content/images/image3-high.png differ diff --git a/test-website/content/images/image3-medium.png b/test-website/content/images/image3-medium.png new file mode 100644 index 00000000..e14e6cc4 Binary files /dev/null and b/test-website/content/images/image3-medium.png differ diff --git a/test-website/content/images/image3-small.png b/test-website/content/images/image3-small.png new file mode 100644 index 00000000..3f0e239c Binary files /dev/null and b/test-website/content/images/image3-small.png differ diff --git a/test-website/content/images/image3.png b/test-website/content/images/image3.png new file mode 100644 index 00000000..3e7fe67d Binary files /dev/null and b/test-website/content/images/image3.png differ diff --git a/test-website/content/images/image4-1.5x.png b/test-website/content/images/image4-1.5x.png new file mode 100644 index 00000000..baf2ecaa Binary files /dev/null and b/test-website/content/images/image4-1.5x.png differ diff --git a/test-website/content/images/image4.png b/test-website/content/images/image4.png new file mode 100644 index 00000000..3e7fe67d Binary files /dev/null and b/test-website/content/images/image4.png differ diff --git a/test-website/content/index.html b/test-website/content/index.html index 17d8add8..74f049ad 100644 --- a/test-website/content/index.html +++ b/test-website/content/index.html @@ -50,6 +50,7 @@
  • Bad redirections
  • Handling of content types
  • Redirect with http-equiv meta directive
  • +
  • Image with srcset
  • diff --git a/tests/test_warc_to_zim.py b/tests/test_warc_to_zim.py index 4bfcf759..8e23392f 100644 --- a/tests/test_warc_to_zim.py +++ b/tests/test_warc_to_zim.py @@ -202,11 +202,15 @@ def verify_warc_and_zim(self, warcfile, zimfile): elif record.rec_type == "response": # We must have a payload assert payload - payload_content = payload.content.tobytes() + payload_content = ( + payload.content.tobytes() # pyright:ignore[reportAttributeAccessIssue] + ) # if HTML, still need to account for the head insert, otherwise should # have exact match - if payload.mimetype.startswith("text/html"): + if payload.mimetype.startswith( # pyright:ignore[reportAttributeAccessIssue] + "text/html" + ): assert head_insert in payload_content elif record.rec_type == "resource": # we do not want to embed resources "as-is"