Skip to content

Commit

Permalink
Merge pull request #1496 from dandi/enh-organize-logging
Browse files Browse the repository at this point in the history
Disallow hdmf 3.14.4, make organize to not parallelize for a single file, log information about all exceptions while reading metadata for organize
  • Loading branch information
yarikoptic authored Sep 9, 2024
2 parents 44e097f + 1aa3df8 commit 8e0b1c3
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 15 deletions.
42 changes: 28 additions & 14 deletions dandi/organize.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import os.path as op
from pathlib import Path, PurePosixPath
import re
import traceback
import uuid

import ruamel.yaml
Expand Down Expand Up @@ -841,44 +842,57 @@ def act(func, *args, **kwargs):
# react to those
# Doesn't play nice with Parallel
# with tqdm.tqdm(desc="Files", total=len(paths), unit="file", unit_scale=False) as pbar:
failed = []

def _get_metadata(path):
# Avoid heavy import by importing within function:
from .metadata.nwb import get_metadata

meta, exc = {}, None
try:
meta = get_metadata(path)
except Exception as exc:
meta = {}
failed.append(path)
# pbar.desc = "Files (%d failed)" % len(failed)
lgr.debug("Failed to get metadata for %s: %s", path, exc)
except Exception as e:
exc = (
e.__class__,
str(e),
traceback.TracebackException.from_exception(e),
)
# pbar.update(1)
meta["path"] = path
return meta
return meta, exc

if (
not devel_debug and jobs != 1
not devel_debug and jobs != 1 and not len(paths) == 1
): # Do not use joblib at all if number_of_jobs=1
# Note: It is Python (pynwb) intensive, not IO, so ATM there is little
# to no benefit from Parallel without using multiproc! But that would
# complicate progress bar indication... TODO
metadata = list(
metadata_excs = list(
Parallel(n_jobs=jobs, verbose=10)(
delayed(_get_metadata)(path) for path in paths
)
)
else:
metadata = list(map(_get_metadata, paths))
if failed:
metadata_excs = list(map(_get_metadata, paths))
exceptions = [e for _, e in metadata_excs if e]
if exceptions:
lgr.warning(
"Failed to load metadata for %d out of %d files",
len(failed),
"Failed to load metadata for %d out of %d files "
"due to following types of exceptions: %s. "
"Details of the exceptions will be shown at DEBUG level",
len(exceptions),
len(paths),
", ".join(e[0].__name__ for e in exceptions),
)
for m, e in metadata_excs:
if not e:
continue
lgr.debug(
"Loading metadata for path %s resulted in following exception:\n%s",
m["path"],
"\n".join(e[-1].format()),
)

metadata, skip_invalid = filter_invalid_metadata_rows(metadata)
metadata, skip_invalid = filter_invalid_metadata_rows([m for m, _ in metadata_excs])
if skip_invalid:
msg = (
"%d out of %d files were found not containing all necessary "
Expand Down
3 changes: 2 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ install_requires =
etelemetry >= 0.2.2
fasteners
fscacher >= 0.3.0
hdmf != 3.5.0
# 3.14.4: https://github.com/hdmf-dev/hdmf/issues/1186
hdmf != 3.5.0,!=3.14.4
humanize
interleave ~= 0.1
joblib
Expand Down

0 comments on commit 8e0b1c3

Please sign in to comment.