Skip to content

Commit d177243

Browse files
authored
Merge pull request #191 from Gallaecio/request-headers-from-bytes
Add from_bytes_dict to HttpRequestHeaders
2 parents d45aa1e + 1a43e0c commit d177243

File tree

3 files changed

+64
-51
lines changed

3 files changed

+64
-51
lines changed

tests/test_page_inputs.py

+19-5
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,14 @@ def test_http_request_init_with_response_url() -> None:
210210
assert str(req.url) == str(resp.url)
211211

212212

213-
def test_http_response_headers_from_bytes_dict() -> None:
213+
@pytest.mark.parametrize(
214+
"cls",
215+
(
216+
HttpRequestHeaders,
217+
HttpResponseHeaders,
218+
),
219+
)
220+
def test_http_headers_from_bytes_dict(cls) -> None:
214221
raw_headers = {
215222
b"Content-Length": [b"316"],
216223
b"Content-Encoding": [b"gzip", b"br"],
@@ -219,7 +226,7 @@ def test_http_response_headers_from_bytes_dict() -> None:
219226
"X-missing": None,
220227
"X-tuple": (b"x", "y"),
221228
}
222-
headers = HttpResponseHeaders.from_bytes_dict(raw_headers)
229+
headers = cls.from_bytes_dict(raw_headers)
223230

224231
assert headers.get("content-length") == "316"
225232
assert headers.get("content-encoding") == "gzip"
@@ -231,12 +238,19 @@ def test_http_response_headers_from_bytes_dict() -> None:
231238
assert headers.getall("x-tuple") == ["x", "y"]
232239

233240

234-
def test_http_response_headers_from_bytes_dict_err() -> None:
241+
@pytest.mark.parametrize(
242+
"cls",
243+
(
244+
HttpRequestHeaders,
245+
HttpResponseHeaders,
246+
),
247+
)
248+
def test_http_response_headers_from_bytes_dict_err(cls) -> None:
235249
with pytest.raises(ValueError):
236-
HttpResponseHeaders.from_bytes_dict({b"Content-Length": [316]})
250+
cls.from_bytes_dict({b"Content-Length": [316]})
237251

238252
with pytest.raises(ValueError):
239-
HttpResponseHeaders.from_bytes_dict({b"Content-Length": 316})
253+
cls.from_bytes_dict({b"Content-Length": 316})
240254

241255

242256
def test_http_response_headers_init_requests() -> None:

web_poet/_base.py

+43-1
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@
33
In general, users shouldn't import and use the contents of this module.
44
"""
55

6-
from typing import Dict, List, Type, TypeVar
6+
from typing import AnyStr, Dict, List, Tuple, Type, TypeVar, Union
77

88
from multidict import CIMultiDict
99

10+
_AnyStrDict = Dict[AnyStr, Union[AnyStr, List[AnyStr], Tuple[AnyStr, ...]]]
1011
T_headers = TypeVar("T_headers", bound="_HttpHeaders")
1112

1213

@@ -31,3 +32,44 @@ def from_name_value_pairs(cls: Type[T_headers], arg: List[Dict]) -> T_headers:
3132
<_HttpHeaders('Content-Encoding': 'gzip', 'content-length': '648')>
3233
"""
3334
return cls([(pair["name"], pair["value"]) for pair in arg])
35+
36+
@classmethod
37+
def from_bytes_dict(
38+
cls: Type[T_headers], arg: _AnyStrDict, encoding: str = "utf-8"
39+
) -> T_headers:
40+
"""An alternative constructor for instantiation where the header-value
41+
pairs could be in raw bytes form.
42+
43+
This supports multiple header values in the form of ``List[bytes]`` and
44+
``Tuple[bytes]]`` alongside a plain ``bytes`` value. A value in ``str``
45+
also works and wouldn't break the decoding process at all.
46+
47+
By default, it converts the ``bytes`` value using "utf-8". However, this
48+
can easily be overridden using the ``encoding`` parameter.
49+
50+
>>> raw_values = {
51+
... b"Content-Encoding": [b"gzip", b"br"],
52+
... b"Content-Type": [b"text/html"],
53+
... b"content-length": b"648",
54+
... }
55+
>>> headers = _HttpHeaders.from_bytes_dict(raw_values)
56+
>>> headers
57+
<_HttpHeaders('Content-Encoding': 'gzip', 'Content-Encoding': 'br', 'Content-Type': 'text/html', 'content-length': '648')>
58+
"""
59+
60+
def _norm(data):
61+
if isinstance(data, str) or data is None:
62+
return data
63+
elif isinstance(data, bytes):
64+
return data.decode(encoding)
65+
raise ValueError(f"Expecting str or bytes. Received {type(data)}")
66+
67+
converted = []
68+
69+
for header, value in arg.items():
70+
if isinstance(value, list) or isinstance(value, tuple):
71+
converted.extend([(_norm(header), _norm(v)) for v in value])
72+
else:
73+
converted.append((_norm(header), _norm(value)))
74+
75+
return cls(converted)

web_poet/page_inputs/http.py

+2-45
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import json
22
from hashlib import sha1
3-
from typing import Any, AnyStr, Dict, List, Optional, Tuple, Type, TypeVar, Union
3+
from typing import Any, Optional, TypeVar, Union
44
from urllib.parse import urljoin
55

66
import attrs
@@ -20,9 +20,7 @@
2020
from .url import RequestUrl as _RequestUrl
2121
from .url import ResponseUrl as _ResponseUrl
2222

23-
T_headers = TypeVar("T_headers", bound="HttpResponseHeaders")
24-
25-
_AnyStrDict = Dict[AnyStr, Union[AnyStr, List[AnyStr], Tuple[AnyStr, ...]]]
23+
T_headers = TypeVar("T_headers", bound=_HttpHeaders)
2624

2725

2826
RequestUrl = _create_deprecated_class("RequestUrl", _RequestUrl)
@@ -113,47 +111,6 @@ class HttpResponseHeaders(_HttpHeaders):
113111
the API spec of :class:`multidict.CIMultiDict`.
114112
"""
115113

116-
@classmethod
117-
def from_bytes_dict(
118-
cls: Type[T_headers], arg: _AnyStrDict, encoding: str = "utf-8"
119-
) -> T_headers:
120-
"""An alternative constructor for instantiation where the header-value
121-
pairs could be in raw bytes form.
122-
123-
This supports multiple header values in the form of ``List[bytes]`` and
124-
``Tuple[bytes]]`` alongside a plain ``bytes`` value. A value in ``str``
125-
also works and wouldn't break the decoding process at all.
126-
127-
By default, it converts the ``bytes`` value using "utf-8". However, this
128-
can easily be overridden using the ``encoding`` parameter.
129-
130-
>>> raw_values = {
131-
... b"Content-Encoding": [b"gzip", b"br"],
132-
... b"Content-Type": [b"text/html"],
133-
... b"content-length": b"648",
134-
... }
135-
>>> headers = HttpResponseHeaders.from_bytes_dict(raw_values)
136-
>>> headers
137-
<HttpResponseHeaders('Content-Encoding': 'gzip', 'Content-Encoding': 'br', 'Content-Type': 'text/html', 'content-length': '648')>
138-
"""
139-
140-
def _norm(data):
141-
if isinstance(data, str) or data is None:
142-
return data
143-
elif isinstance(data, bytes):
144-
return data.decode(encoding)
145-
raise ValueError(f"Expecting str or bytes. Received {type(data)}")
146-
147-
converted = []
148-
149-
for header, value in arg.items():
150-
if isinstance(value, list) or isinstance(value, tuple):
151-
converted.extend([(_norm(header), _norm(v)) for v in value])
152-
else:
153-
converted.append((_norm(header), _norm(value)))
154-
155-
return cls(converted)
156-
157114
def declared_encoding(self) -> Optional[str]:
158115
"""Return encoding detected from the Content-Type header, or None
159116
if encoding is not found"""

0 commit comments

Comments
 (0)