Skip to content

Commit 7b906e6

Browse files
authored
Merge pull request #1374 from dandi/gh-1373
Double-check file sizes before & after uploading
2 parents cb20d8d + cec2e23 commit 7b906e6

File tree

4 files changed

+157
-79
lines changed

4 files changed

+157
-79
lines changed

dandi/files/bases.py

+75-68
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from dandi.dandiapi import RemoteAsset, RemoteDandiset, RESTFullAPIClient
2828
from dandi.metadata.core import get_default_metadata
2929
from dandi.misctypes import DUMMY_DANDI_ETAG, Digest, LocalReadableFile, P
30-
from dandi.utils import yaml_load
30+
from dandi.utils import post_upload_size_check, pre_upload_size_check, yaml_load
3131
from dandi.validate_types import Scope, Severity, ValidationOrigin, ValidationResult
3232

3333
lgr = dandi.get_logger()
@@ -350,7 +350,7 @@ def iter_upload(
350350
)
351351
yield {"status": "initiating upload"}
352352
lgr.debug("%s: Beginning upload", asset_path)
353-
total_size = self.size
353+
total_size = pre_upload_size_check(self.filepath)
354354
try:
355355
resp = client.post(
356356
"/uploads/initialize/",
@@ -370,73 +370,80 @@ def iter_upload(
370370
else:
371371
raise
372372
else:
373-
upload_id = resp["upload_id"]
374-
parts = resp["parts"]
375-
if len(parts) != etagger.part_qty:
376-
raise RuntimeError(
377-
f"Server and client disagree on number of parts for upload;"
378-
f" server says {len(parts)}, client says {etagger.part_qty}"
379-
)
380-
parts_out = []
381-
bytes_uploaded = 0
382-
lgr.debug("Uploading %s in %d parts", self.filepath, len(parts))
383-
with RESTFullAPIClient("http://nil.nil") as storage:
384-
with self.filepath.open("rb") as fp:
385-
with ThreadPoolExecutor(max_workers=jobs or 5) as executor:
386-
lock = Lock()
387-
futures = [
388-
executor.submit(
389-
_upload_blob_part,
390-
storage_session=storage,
391-
fp=fp,
392-
lock=lock,
393-
etagger=etagger,
394-
asset_path=asset_path,
395-
part=part,
373+
try:
374+
upload_id = resp["upload_id"]
375+
parts = resp["parts"]
376+
if len(parts) != etagger.part_qty:
377+
raise RuntimeError(
378+
f"Server and client disagree on number of parts for upload;"
379+
f" server says {len(parts)}, client says {etagger.part_qty}"
380+
)
381+
parts_out = []
382+
bytes_uploaded = 0
383+
lgr.debug("Uploading %s in %d parts", self.filepath, len(parts))
384+
with RESTFullAPIClient("http://nil.nil") as storage:
385+
with self.filepath.open("rb") as fp:
386+
with ThreadPoolExecutor(max_workers=jobs or 5) as executor:
387+
lock = Lock()
388+
futures = [
389+
executor.submit(
390+
_upload_blob_part,
391+
storage_session=storage,
392+
fp=fp,
393+
lock=lock,
394+
etagger=etagger,
395+
asset_path=asset_path,
396+
part=part,
397+
)
398+
for part in parts
399+
]
400+
for fut in as_completed(futures):
401+
out_part = fut.result()
402+
bytes_uploaded += out_part["size"]
403+
yield {
404+
"status": "uploading",
405+
"upload": 100 * bytes_uploaded / total_size,
406+
"current": bytes_uploaded,
407+
}
408+
parts_out.append(out_part)
409+
lgr.debug("%s: Completing upload", asset_path)
410+
resp = client.post(
411+
f"/uploads/{upload_id}/complete/",
412+
json={"parts": parts_out},
413+
)
414+
lgr.debug(
415+
"%s: Announcing completion to %s",
416+
asset_path,
417+
resp["complete_url"],
418+
)
419+
r = storage.post(
420+
resp["complete_url"], data=resp["body"], json_resp=False
421+
)
422+
lgr.debug(
423+
"%s: Upload completed. Response content: %s",
424+
asset_path,
425+
r.content,
426+
)
427+
rxml = fromstring(r.text)
428+
m = re.match(r"\{.+?\}", rxml.tag)
429+
ns = m.group(0) if m else ""
430+
final_etag = rxml.findtext(f"{ns}ETag")
431+
if final_etag is not None:
432+
final_etag = final_etag.strip('"')
433+
if final_etag != filetag:
434+
raise RuntimeError(
435+
"Server and client disagree on final ETag of"
436+
f" uploaded file; server says {final_etag},"
437+
f" client says {filetag}"
396438
)
397-
for part in parts
398-
]
399-
for fut in as_completed(futures):
400-
out_part = fut.result()
401-
bytes_uploaded += out_part["size"]
402-
yield {
403-
"status": "uploading",
404-
"upload": 100 * bytes_uploaded / total_size,
405-
"current": bytes_uploaded,
406-
}
407-
parts_out.append(out_part)
408-
lgr.debug("%s: Completing upload", asset_path)
409-
resp = client.post(
410-
f"/uploads/{upload_id}/complete/",
411-
json={"parts": parts_out},
412-
)
413-
lgr.debug(
414-
"%s: Announcing completion to %s",
415-
asset_path,
416-
resp["complete_url"],
417-
)
418-
r = storage.post(
419-
resp["complete_url"], data=resp["body"], json_resp=False
420-
)
421-
lgr.debug(
422-
"%s: Upload completed. Response content: %s",
423-
asset_path,
424-
r.content,
425-
)
426-
rxml = fromstring(r.text)
427-
m = re.match(r"\{.+?\}", rxml.tag)
428-
ns = m.group(0) if m else ""
429-
final_etag = rxml.findtext(f"{ns}ETag")
430-
if final_etag is not None:
431-
final_etag = final_etag.strip('"')
432-
if final_etag != filetag:
433-
raise RuntimeError(
434-
"Server and client disagree on final ETag of uploaded file;"
435-
f" server says {final_etag}, client says {filetag}"
436-
)
437-
# else: Error? Warning?
438-
resp = client.post(f"/uploads/{upload_id}/validate/")
439-
blob_id = resp["blob_id"]
439+
# else: Error? Warning?
440+
resp = client.post(f"/uploads/{upload_id}/validate/")
441+
blob_id = resp["blob_id"]
442+
except Exception:
443+
post_upload_size_check(self.filepath, total_size, True)
444+
raise
445+
else:
446+
post_upload_size_check(self.filepath, total_size, False)
440447
lgr.debug("%s: Assigning asset blob to dandiset & version", asset_path)
441448
yield {"status": "producing asset"}
442449
if replacing is not None:

dandi/files/zarr.py

+28-11
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,13 @@
3333
)
3434
from dandi.metadata.core import get_default_metadata
3535
from dandi.misctypes import DUMMY_DANDI_ZARR_CHECKSUM, BasePath, Digest
36-
from dandi.utils import chunked, exclude_from_zarr, pluralize
36+
from dandi.utils import (
37+
chunked,
38+
exclude_from_zarr,
39+
pluralize,
40+
post_upload_size_check,
41+
pre_upload_size_check,
42+
)
3743

3844
from .bases import LocalDirectoryAsset
3945
from ..validate_types import Scope, Severity, ValidationOrigin, ValidationResult
@@ -551,15 +557,21 @@ def mkzarr() -> str:
551557
def _upload_zarr_file(
552558
storage_session: RESTFullAPIClient, upload_url: str, item: UploadItem
553559
) -> int:
554-
with item.filepath.open("rb") as fp:
555-
storage_session.put(
556-
upload_url,
557-
data=fp,
558-
json_resp=False,
559-
retry_if=_retry_zarr_file,
560-
headers={"Content-MD5": item.base64_digest},
561-
)
562-
return item.size
560+
try:
561+
with item.filepath.open("rb") as fp:
562+
storage_session.put(
563+
upload_url,
564+
data=fp,
565+
json_resp=False,
566+
retry_if=_retry_zarr_file,
567+
headers={"Content-MD5": item.base64_digest},
568+
)
569+
except Exception:
570+
post_upload_size_check(item.filepath, item.size, True)
571+
raise
572+
else:
573+
post_upload_size_check(item.filepath, item.size, False)
574+
return item.size
563575

564576

565577
def _retry_zarr_file(r: requests.Response) -> bool:
@@ -634,7 +646,12 @@ class UploadItem:
634646

635647
@classmethod
636648
def from_entry(cls, e: LocalZarrEntry, digest: str) -> UploadItem:
637-
return cls(entry_path=str(e), filepath=e.filepath, digest=digest, size=e.size)
649+
return cls(
650+
entry_path=str(e),
651+
filepath=e.filepath,
652+
digest=digest,
653+
size=pre_upload_size_check(e.filepath),
654+
)
638655

639656
@property
640657
def base64_digest(self) -> str:

dandi/tests/test_utils.py

+28
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from collections.abc import Iterable
44
import inspect
5+
import logging
56
import os.path as op
67
from pathlib import Path
78
import time
@@ -29,6 +30,7 @@
2930
is_page2_url,
3031
is_same_time,
3132
on_windows,
33+
post_upload_size_check,
3234
under_paths,
3335
)
3436

@@ -561,3 +563,29 @@ def test_under_paths(
561563
paths: list[str], filter_paths: list[str], results: list[str]
562564
) -> None:
563565
assert list(map(str, under_paths(paths, filter_paths))) == results
566+
567+
568+
def test_post_upload_size_check_not_erroring(tmp_path: Path) -> None:
569+
p = tmp_path / "file.txt"
570+
# Write bytes so the size is the same on Unix and Windows:
571+
p.write_bytes(b"This is test text.\n")
572+
with pytest.raises(RuntimeError) as excinfo:
573+
post_upload_size_check(p, 42, False)
574+
assert (
575+
str(excinfo.value)
576+
== f"Size of {p} was 42 at start of upload but is now 19 after upload"
577+
)
578+
579+
580+
def test_post_upload_size_check_erroring(
581+
caplog: pytest.LogCaptureFixture, tmp_path: Path
582+
) -> None:
583+
p = tmp_path / "file.txt"
584+
# Write bytes so the size is the same on Unix and Windows:
585+
p.write_bytes(b"This is test text.\n")
586+
post_upload_size_check(p, 42, True)
587+
assert (
588+
"dandi",
589+
logging.ERROR,
590+
f"Size of {p} was 42 at start of upload but is now 19 after upload",
591+
) in caplog.record_tuples

dandi/utils.py

+26
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import shutil
2020
import subprocess
2121
import sys
22+
from time import sleep
2223
import traceback
2324
import types
2425
from typing import IO, Any, List, Optional, Protocol, TypeVar, Union
@@ -834,3 +835,28 @@ def _prepare_path_parts(paths: Iterable[str | PurePath]) -> list[tuple[str, ...]
834835

835836
def _starts_with(t: tuple[str, ...], prefix: tuple[str, ...]) -> bool:
836837
return t[: len(prefix)] == prefix
838+
839+
840+
def pre_upload_size_check(path: Path) -> int:
841+
# If the filesystem reports a size of zero for a file we're about to
842+
# upload, double-check the size in case we're on a flaky NFS system.
843+
for naptime in [0] + [0.1] * 19:
844+
sleep(naptime)
845+
size = path.stat().st_size
846+
if size != 0:
847+
return size
848+
return size
849+
850+
851+
def post_upload_size_check(path: Path, pre_check_size: int, erroring: bool) -> None:
852+
# More checks for NFS flakiness
853+
size = path.stat().st_size
854+
if size != pre_check_size:
855+
msg = (
856+
f"Size of {path} was {pre_check_size} at start of upload but is"
857+
f" now {size} after upload"
858+
)
859+
if erroring:
860+
lgr.error(msg)
861+
else:
862+
raise RuntimeError(msg)

0 commit comments

Comments
 (0)