Skip to content

Commit

Permalink
fix: removed download_file (#121)
Browse files Browse the repository at this point in the history
## changes

- removed `download_file`
- refactored rest of code to use `download_files`
  • Loading branch information
sg-s authored Nov 26, 2024
1 parent f0ca4ae commit e79b204
Show file tree
Hide file tree
Showing 12 changed files with 536 additions and 264 deletions.
99 changes: 42 additions & 57 deletions src/data_hub/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,48 +239,6 @@ def list_rows(
)


@beartype
@ensure_client
def download_file(
file_id: str,
*,
destination: str | Path = os.getcwd(),
client=None,
_stash: bool = False,
) -> None:
"""Download a file to a destination folder (workspace).
Download a file synchronously from Deep Origin
to folder on the local file system.
Args:
file_id: ID of the file on Deep Origin
destination: Path to the destination folder
"""

if not os.path.isdir(destination):
raise DeepOriginException(
message=f"Destination `{destination}` should be a path for a folder."
)

file_name = _api.describe_file(
file_id=file_id,
client=client,
_stash=_stash,
).name

url = _api.create_file_download_url(
file_id=file_id,
client=client,
_stash=_stash,
).downloadUrl

save_path = os.path.join(destination, file_name)

download_sync(url, save_path)


@beartype
@ensure_client
def upload_file(
Expand Down Expand Up @@ -929,9 +887,9 @@ def download(
if PREFIXES.FILE in source:
# this is a file

download_file(
file_id=source,
destination=destination,
download_files(
file_ids=[source],
save_to_dir=destination,
client=client,
_stash=_stash,
)
Expand Down Expand Up @@ -1002,10 +960,11 @@ def download_database(

# now download all files in the database
if include_files:
file_ids = df.attrs["file_ids"]

for file_id in file_ids:
download_file(file_id, destination, client=client)
download_files(
file_ids=df.attrs["file_ids"],
save_to_dir=destination,
client=client,
)

df.to_csv(os.path.join(destination, database_hid + ".csv"))

Expand Down Expand Up @@ -1084,6 +1043,7 @@ def get_dataframe(
df = _make_deeporigin_dataframe(
data=data,
reference_ids=None,
file_ids=None,
db_row=db_row,
rows=None,
columns=None,
Expand Down Expand Up @@ -1169,6 +1129,7 @@ def get_dataframe(
df = _make_deeporigin_dataframe(
data=data,
reference_ids=reference_ids,
file_ids=file_ids,
db_row=db_row,
rows=rows,
columns=columns,
Expand All @@ -1192,6 +1153,7 @@ def _make_deeporigin_dataframe(
*,
data: dict,
reference_ids: Optional[list],
file_ids: Optional[list],
db_row: dict,
columns: Optional[list],
rows: Optional[list],
Expand All @@ -1203,6 +1165,12 @@ def _make_deeporigin_dataframe(
df = DataFrame(data)
if reference_ids is not None:
df.attrs["reference_ids"] = list(set(reference_ids))
df.attrs["reference_ids"].sort()

if file_ids is not None:
df.attrs["file_ids"] = list(set(file_ids))
df.attrs["file_ids"].sort()

df.attrs["id"] = db_row.id
df.attrs["metadata"] = dict(db_row)

Expand All @@ -1223,8 +1191,9 @@ def _make_deeporigin_dataframe(
@beartype
@ensure_client
def download_files(
files: Optional[list | dict] = None,
*,
files: Optional[list[dict]] = None,
file_ids: Optional[list[str]] = None,
save_to_dir: Path | str = Path("."),
use_file_names: bool = True,
client=None,
Expand All @@ -1233,21 +1202,37 @@ def download_files(
"""download multiple files in parallel to local disk
Args:
files: list of files to download. These can be of type `types.list_files_response.Data` (as returned by api.list_files) or can be a list of strings of file IDs.
files: list of files to download. These can be a list of file_ids or a list of files as returned by api.list_files
save_to_dir: directory to save files to on local computer
use_file_names: If `True`, refer to files by name rather than ID.
"""

if files is None:
files = list_files(client=client)
if not os.path.isdir(save_to_dir):
raise DeepOriginException(
message=f"Destination `{save_to_dir}` should be a path for a folder."
)

if files is None and file_ids is None:
# nothing provided, download everything
files = list_files(client=client, _stash=_stash)
elif files is not None and file_ids is None:
# list of files provided
pass
elif files is None and file_ids is not None:
# list of file IDs provided
files = list_files(
file_ids=file_ids,
client=client,
_stash=_stash,
)

if isinstance(files, dict):
files = [files]
else:
raise DeepOriginException("Only one of `files` or `file_ids` can be provided")

if isinstance(save_to_dir, str):
save_to_dir = Path(save_to_dir)

if isinstance(files[0], dict):
file_ids = [item.file.id for item in files]
file_ids = [item.file.id for item in files]

if use_file_names:
save_paths = [save_to_dir / item.file.name for item in files]
Expand Down
16 changes: 15 additions & 1 deletion tests/fixtures/responses/convert_id_format.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,30 @@
"id": "_row:cbOepriiytSaPfpqetT4C",
"hid": "dfsd-306"
},
{
"id": "_row:rakyaiwKrKsS4JT9IJsXn",
"hid": "exp-1"
},
{
"id": "_row:ku2ZdpZ0VkYgOj1Y37rSi",
"hid": "dna-8"
}
],
"{\"conversions\": [{\"id\": \"_row:cbOepriiytSaPfpqetT4C\"}, {\"id\": \"_row:ku2ZdpZ0VkYgOj1Y37rSi\"}, {\"id\": \"_row:rakyaiwKrKsS4JT9IJsXn\"}]}": [
{
"id": "_row:cbOepriiytSaPfpqetT4C",
"hid": "dfsd-306"
},
{
"id": "_row:rakyaiwKrKsS4JT9IJsXn",
"hid": "exp-1"
},
{
"id": "_row:ku2ZdpZ0VkYgOj1Y37rSi",
"hid": "dna-8"
}
],
"{\"conversions\": [{\"id\": \"_row:cbOepriiytSaPfpqetT4C\"}, {\"id\": \"_row:ku2ZdpZ0VkYgOj1Y37rSi\"}, {\"id\": \"_row:rakyaiwKrKsS4JT9IJsXn\"}]}": [
"{\"conversions\": [{\"id\": \"_row:cbOepriiytSaPfpqetT4C\"}, {\"id\": \"_row:rakyaiwKrKsS4JT9IJsXn\"}, {\"id\": \"_row:ku2ZdpZ0VkYgOj1Y37rSi\"}]}": [
{
"id": "_row:cbOepriiytSaPfpqetT4C",
"hid": "dfsd-306"
Expand Down
6 changes: 3 additions & 3 deletions tests/fixtures/responses/create_database.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"{\"database\": {\"hid\": \"tc-4Qzkrn57rM-db\", \"hidPrefix\": \"tc-4Qzkrn57rM-db\", \"name\": \"tc-4Qzkrn57rM-db\", \"parentId\": \"registry\"}}": {
"id": "_database:zN9taqV6WxLzjWxKtxwce",
"id": "_database:LanvJgEqhfNVHtgeZgsnT",
"type": "database",
"hid": "tc-4Qzkrn57rM-db",
"name": "tc-4Qzkrn57rM-db",
"dateCreated": "2024-11-19 03:19:28.539577",
"dateUpdated": "2024-11-19 03:19:28.539577",
"dateCreated": "2024-11-25 19:08:42.989402",
"dateUpdated": "2024-11-25 19:08:42.989402",
"createdByUserDrn": "<redacted>",
"parentId": "_workspace:IYTHdIjtuSvnOekwQjUlN",
"hidPrefix": "tc-4Qzkrn57rM-db",
Expand Down
15 changes: 15 additions & 0 deletions tests/fixtures/responses/create_file_upload.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,20 @@
"dateUpdated": "2024-11-19T03:19:28.397Z",
"createdByUserDrn": "<redacted>"
}
},
"{\"checksum_sha256\": \"rRS/5tR0d/MjICI1AdOr7CAyl1JIvgc79n3isXNAf9g=\", \"content_length\": \"10682\", \"content_type\": \"text/x-python\", \"name\": \"test_data_hub.py\"}": {
"uploadUrl": "<redacted>",
"file": {
"name": "test_data_hub.py",
"contentType": "text/x-python",
"contentLength": 10682,
"checksumSha256": "rRS/5tR0d/MjICI1AdOr7CAyl1JIvgc79n3isXNAf9g=",
"id": "_file:ZKPAZZ8H8eKtrXRwZ9CON",
"status": "ready",
"uri": "<redacted>",
"dateCreated": "2024-11-25T19:08:43.221Z",
"dateUpdated": "2024-11-25T19:08:43.221Z",
"createdByUserDrn": "<redacted>"
}
}
}
6 changes: 3 additions & 3 deletions tests/fixtures/responses/create_workspace.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"{\"workspace\": {\"hid\": \"tc-4Qzkrn57rM-ws\", \"name\": \"tc-4Qzkrn57rM-ws\", \"parentId\": null}}": {
"id": "_workspace:WGKwIfOV6ySsJMkbtvRvC",
"id": "_workspace:MptBTTL9qUbbSbUiAydsN",
"type": "workspace",
"hid": "tc-4Qzkrn57rM-ws",
"name": "tc-4Qzkrn57rM-ws",
"dateCreated": "2024-11-19 03:19:28.75955",
"dateUpdated": "2024-11-19 03:19:28.75955",
"dateCreated": "2024-11-25 19:08:43.22463",
"dateUpdated": "2024-11-25 19:08:43.22463",
"createdByUserDrn": "<redacted>"
}
}
2 changes: 1 addition & 1 deletion tests/fixtures/responses/describe_database_stats.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"{\"database_id\": \"kitchen-sink\"}": {
"rowCount": 127
"rowCount": 0
}
}
2 changes: 2 additions & 0 deletions tests/fixtures/responses/describe_row.json
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@
]
},
"{\"fields\": true, \"row_id\": \"dfsd-306\"}": {
"parentHid": "ligands",
"type": "row",
"id": "_row:cbOepriiytSaPfpqetT4C",
"hid": "dfsd-306",
Expand Down Expand Up @@ -729,6 +730,7 @@
]
},
"{\"fields\": false, \"row_id\": \"dfsd-306\"}": {
"parentHid": "ligands",
"type": "row",
"id": "_row:cbOepriiytSaPfpqetT4C",
"hid": "dfsd-306",
Expand Down
9 changes: 5 additions & 4 deletions tests/fixtures/responses/ensure_rows.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
"{\"database_id\": \"kitchen-sink\", \"rows\": [{\"row\": {}}]}": {
"rows": [
{
"parentHid": "kitchen-sink",
"type": "row",
"id": "_row:OIy39TGcV7jqLbCHsdhTO",
"hid": "ks-136",
"id": "_row:_database:swAKhqQzxqTB7cm2FeVcW:iqZurZ4W4UzDwpRgo5Vuj",
"hid": "ks-155",
"parentId": "_database:swAKhqQzxqTB7cm2FeVcW",
"dateCreated": "2024-11-19 03:19:29.007535",
"dateUpdated": "2024-11-19 03:19:29.007535",
"dateCreated": "2024-11-25 19:08:43.711226",
"dateUpdated": "2024-11-25 19:08:43.711226",
"createdByUserDrn": "<redacted>",
"validationStatus": "valid"
}
Expand Down
Loading

0 comments on commit e79b204

Please sign in to comment.