Skip to content

Commit e9a6ecf

Browse files
committed
use yarl underneath ResponseURL and RequestURL
1 parent b2c665e commit e9a6ecf

File tree

6 files changed

+59
-3
lines changed

6 files changed

+59
-3
lines changed

setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
'url-matcher',
2626
'multidict',
2727
'w3lib >= 1.22.0',
28+
'yarl',
2829
],
2930
classifiers=[
3031
'Development Status :: 2 - Pre-Alpha',

tests/test_page_inputs.py

+18
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import requests
66

77
from web_poet.page_inputs import (
8+
ResponseURL,
9+
RequestURL,
810
HttpRequest,
911
HttpResponse,
1012
HttpRequestBody,
@@ -14,6 +16,22 @@
1416
)
1517

1618

19+
@pytest.mark.parametrize("cls", [ResponseURL, RequestURL])
20+
def test_url(cls):
21+
url_value = "https://example.com/category/product?query=123&id=xyz#frag1"
22+
23+
url = cls(url_value)
24+
25+
assert str(url) == url_value
26+
assert url.scheme == "https"
27+
assert url.host == "example.com"
28+
assert url.path == "/category/product"
29+
assert url.query_string == "query=123&id=xyz"
30+
assert url.fragment == "frag1"
31+
32+
new_url = cls(url)
33+
34+
1735
@pytest.mark.parametrize("body_cls", [HttpRequestBody, HttpResponseBody])
1836
def test_http_body_hashable(body_cls):
1937
http_body = body_cls(b"content")

web_poet/.overrides.py.swp

16 KB
Binary file not shown.

web_poet/mixins.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def base_url(self) -> str:
4242
# FIXME: move it to HttpResponse
4343
if self._cached_base_url is None:
4444
text = self.html[:4096]
45-
self._cached_base_url = get_base_url(text, self.url)
45+
self._cached_base_url = get_base_url(text, str(self.url))
4646
return self._cached_base_url
4747

4848
def urljoin(self, url: str) -> str:

web_poet/page_inputs/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from .meta import Meta
22
from .client import HttpClient
33
from .http import (
4+
ResponseURL,
5+
RequestURL,
46
HttpRequest,
57
HttpResponse,
68
HttpRequestHeaders,

web_poet/page_inputs/http.py

+37-2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
http_content_type_encoding
1111
)
1212

13+
import yarl
1314
from web_poet._base import _HttpHeaders
1415
from web_poet.utils import memoizemethod_noargs
1516

@@ -18,12 +19,46 @@
1819
_AnyStrDict = Dict[AnyStr, Union[AnyStr, List[AnyStr], Tuple[AnyStr, ...]]]
1920

2021

21-
class ResponseURL(str):
22+
class _URL:
23+
def __init__(self, url: Union[str, yarl.URL]):
24+
self._url = yarl.URL(str(url))
25+
26+
def __str__(self) -> str:
27+
return str(self._url)
28+
29+
def __repr__(self) -> str:
30+
return str(self._url)
31+
32+
def __eq__(self, other) -> bool:
33+
return str(self._url) == str(other)
34+
35+
@property
36+
def scheme(self) -> str:
37+
return self._url.scheme
38+
39+
@property
40+
def host(self) -> str:
41+
return self._url.host
42+
43+
@property
44+
def path(self) -> str:
45+
return self._url.path
46+
47+
@property
48+
def query_string(self) -> str:
49+
return self._url.query_string
50+
51+
@property
52+
def fragment(self) -> str:
53+
return self._url.fragment
54+
55+
56+
class ResponseURL(_URL):
2257
""" URL of the response """
2358
pass
2459

2560

26-
class RequestURL(str):
61+
class RequestURL(_URL):
2762
""" URL of the request """
2863
pass
2964

0 commit comments

Comments
 (0)