Skip to content

Commit ee18a15

Browse files
authored
Merge pull request #1415 from dandi/gh-1380
Use `yarl` to clean up some code
2 parents 4a1f7a7 + e58df62 commit ee18a15

File tree

5 files changed

+45
-22
lines changed

5 files changed

+45
-22
lines changed

dandi/dandiapi.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@
1414
from time import sleep, time
1515
from types import TracebackType
1616
from typing import TYPE_CHECKING, Any, Dict, List, Optional
17-
from urllib.parse import quote_plus, urlparse, urlunparse
1817

1918
import click
2019
from dandischema import models
2120
from pydantic import BaseModel, Field, PrivateAttr
2221
import requests
2322
import tenacity
23+
from yarl import URL
2424

2525
from . import get_logger
2626
from .consts import (
@@ -1521,7 +1521,7 @@ def get_content_url(
15211521
else:
15221522
raise # reraise since we need to figure out how to handle such a case
15231523
if strip_query:
1524-
url = urlunparse(urlparse(url)._replace(query=""))
1524+
url = str(URL(url).with_query(None))
15251525
return url
15261526

15271527
def get_download_file_iter(
@@ -1970,9 +1970,10 @@ def get_download_file_iter(
19701970
Returns a function that when called (optionally with an offset into the
19711971
file to start downloading at) returns a generator of chunks of the file
19721972
"""
1973-
prefix = quote_plus(str(self))
1974-
url = self.client.get_url(
1975-
f"/zarr/{self.zarr_id}/files?prefix={prefix}&download=true"
1973+
url = str(
1974+
URL(self.client.get_url(f"/zarr/{self.zarr_id}/files/")).with_query(
1975+
{"prefix": str(self), "download": "true"}
1976+
)
19761977
)
19771978

19781979
def downloader(start_at: int = 0) -> Iterator[bytes]:

dandi/delete.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from pathlib import Path
77

88
import click
9+
from yarl import URL
910

1011
from .consts import DRAFT, ZARR_EXTENSIONS, DandiInstance, dandiset_metadata_file
1112
from .dandiapi import DandiAPIClient, RemoteAsset, RemoteDandiset
@@ -246,5 +247,8 @@ def find_local_asset(filepath: str) -> tuple[str, str]:
246247

247248

248249
def is_same_url(url1: str, url2: str) -> bool:
249-
# TODO: Use a real URL library like furl, purl, or yarl
250-
return url1.rstrip("/") == url2.rstrip("/")
250+
u1 = URL(url1)
251+
u1 = u1.with_path(u1.path.rstrip("/"))
252+
u2 = URL(url2)
253+
u2 = u2.with_path(u2.path.rstrip("/"))
254+
return u1 == u2

dandi/tests/test_delete.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from .fixtures import DandiAPI, SampleDandiset
99
from ..consts import DRAFT, dandiset_metadata_file
1010
from ..dandiapi import RESTFullAPIClient
11-
from ..delete import delete
11+
from ..delete import delete, is_same_url
1212
from ..download import download
1313
from ..exceptions import NotFoundError
1414
from ..utils import list_paths
@@ -439,3 +439,16 @@ def test_delete_zarr_path(
439439
assert list_paths(tmp_path) == [
440440
tmp_path / zarr_dandiset.dandiset_id / "dandiset.yaml"
441441
]
442+
443+
444+
@pytest.mark.parametrize(
445+
"url1,url2,r",
446+
[
447+
("https://example.com/api", "https://example.com/api/", True),
448+
("https://example.com/api", "http://example.com/api", False),
449+
("https://example.com/api", "HTTPS://EXAMPLE.COM/api", True),
450+
("https://example.珠宝/api", "https://example.xn--pbt977c/api", True),
451+
],
452+
)
453+
def test_is_same_url(url1: str, url2: str, r: bool) -> None:
454+
assert is_same_url(url1, url2) is r

dandi/utils.py

+18-14
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,14 @@
2323
import traceback
2424
import types
2525
from typing import IO, Any, List, Optional, Protocol, TypeVar, Union
26-
from urllib.parse import parse_qs, urlparse, urlunparse
2726

2827
import dateutil.parser
28+
from multidict import MultiDict # dependency of yarl
2929
from pydantic import BaseModel, Field
3030
import requests
3131
import ruamel.yaml
3232
from semantic_version import Version
33+
from yarl import URL
3334

3435
from . import __version__, get_logger
3536
from .consts import DandiInstance, known_instances, known_instances_rev
@@ -567,8 +568,9 @@ def get_instance(dandi_instance_id: str | DandiInstance) -> DandiInstance:
567568
else:
568569
instance = None
569570
is_api = False
570-
bits = urlparse(redirector_url)
571-
redirector_url = urlunparse((bits[0], bits[1], "", "", "", ""))
571+
redirector_url = str(
572+
URL(redirector_url).with_path("").with_query(None).with_fragment(None)
573+
)
572574
else:
573575
dandi_id = dandi_instance_id
574576
instance = known_instances[dandi_id]
@@ -619,13 +621,13 @@ def _get_instance(
619621
if dandi_id is None:
620622
# Don't use pydantic.AnyHttpUrl, as that sets the `port` attribute even
621623
# if it's not present in the string.
622-
bits = urlparse(api_url)
623-
if bits.hostname is not None:
624-
dandi_id = bits.hostname
625-
if bits.port is not None:
624+
u = URL(api_url)
625+
if u.host is not None:
626+
dandi_id = u.host
627+
if (port := u.explicit_port) is not None:
626628
if ":" in dandi_id:
627629
dandi_id = f"[{dandi_id}]"
628-
dandi_id += f":{bits.port}"
630+
dandi_id += f":{port}"
629631
else:
630632
dandi_id = api_url
631633
return DandiInstance(
@@ -790,12 +792,14 @@ def is_page2_url(page1: str, page2: str) -> bool:
790792
Tests whether the URL ``page2`` is the same as ``page1`` but with the
791793
``page`` query parameter set to ``2``
792794
"""
793-
bits1 = urlparse(page1)
794-
params1 = parse_qs(bits1.query)
795-
params1["page"] = ["2"]
796-
bits2 = urlparse(page2)
797-
params2 = parse_qs(bits2.query)
798-
return (bits1[:3], params1, bits1.fragment) == (bits2[:3], params2, bits2.fragment)
795+
url1 = URL(page1)
796+
params1 = MultiDict(url1.query)
797+
params1["page"] = "2"
798+
url1 = url1.with_query(None)
799+
url2 = URL(page2)
800+
params2 = url2.query
801+
url2 = url2.with_query(None)
802+
return (url1, sorted(params1.items())) == (url2, sorted(params2.items()))
799803

800804

801805
def exclude_from_zarr(path: Path) -> bool:

setup.cfg

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ install_requires =
5555
ruamel.yaml >=0.15, <1
5656
semantic-version
5757
tenacity
58+
yarl ~= 1.9
5859
zarr ~= 2.10
5960
zarr_checksum ~= 0.4.0
6061
zip_safe = False

0 commit comments

Comments
 (0)