Skip to content

Commit

Permalink
Merge pull request #1104 from dandi/gh-943
Browse files Browse the repository at this point in the history
Structured validation results
  • Loading branch information
yarikoptic authored Oct 28, 2022
2 parents 1e43339 + 6205109 commit 48a222f
Show file tree
Hide file tree
Showing 13 changed files with 773 additions and 336 deletions.
77 changes: 0 additions & 77 deletions dandi/bids_utils.py

This file was deleted.

194 changes: 138 additions & 56 deletions dandi/cli/cmd_validate.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
import logging
import os
from typing import List, cast

import click

from .base import devel_debug_option, devel_option, lgr, map_to_click_exceptions
from .base import devel_debug_option, devel_option, map_to_click_exceptions
from ..utils import pluralize
from ..validate_types import Severity


@click.command()
@click.option(
"--schema", help="Validate against new BIDS schema version", metavar="VERSION"
"--schema", help="Validate against new BIDS schema version.", metavar="VERSION"
)
@click.option(
"--report-path",
Expand All @@ -20,13 +23,21 @@
is_flag=True,
help="Whether to write a report under a unique path in the DANDI log directory.",
)
@click.option(
"--grouping",
"-g",
help="How to group error/warning reporting.",
type=click.Choice(["none", "path"], case_sensitive=False),
default="none",
)
@click.argument("paths", nargs=-1, type=click.Path(exists=True, dir_okay=True))
@map_to_click_exceptions
def validate_bids(
paths,
schema,
report,
report_path,
grouping="none",
):
"""Validate BIDS paths.
Expand All @@ -36,20 +47,16 @@ def validate_bids(
dandi validate-bids /my/path
"""

from ..bids_utils import is_valid, report_errors
from ..validate import validate_bids as validate_bids_

validator_result = validate_bids_(
validator_result = validate_bids_( # Controller
*paths,
report=report,
report_path=report_path,
schema_version=schema,
)
valid = is_valid(validator_result)
report_errors(validator_result)

if not valid:
raise SystemExit(1)
_process_issues(validator_result, grouping)


@click.command()
Expand All @@ -60,13 +67,32 @@ def validate_bids(
default=False,
is_flag=True,
)
@click.option(
"--grouping",
"-g",
help="How to group error/warning reporting.",
type=click.Choice(["none", "path"], case_sensitive=False),
default="none",
)
@click.argument("paths", nargs=-1, type=click.Path(exists=True, dir_okay=True))
@devel_debug_option()
@map_to_click_exceptions
def validate(paths, schema=None, devel_debug=False, allow_any_path=False):
def validate(
paths,
schema=None,
devel_debug=False,
allow_any_path=False,
grouping="none",
):
"""Validate files for NWB and DANDI compliance.
Exits with non-0 exit code if any file is not compliant.
Parameters
----------
grouping : str, optional
A string which is either "", "error", or "path" — "error" not yet implemented.
"""
from ..pynwb_utils import ignore_benign_pynwb_warnings
from ..validate import validate as validate_
Expand All @@ -84,61 +110,117 @@ def validate(paths, schema=None, devel_debug=False, allow_any_path=False):
# at this point, so we ignore it although ideally there should be a formal
# way to get relevant warnings (not errors) from PyNWB
ignore_benign_pynwb_warnings()
view = "one-at-a-time" # TODO: rename, add groupped

all_files_errors = {}
nfiles = 0
for path, errors in validate_(
validator_result = validate_(
*paths,
schema_version=schema,
devel_debug=devel_debug,
allow_any_path=allow_any_path,
):
nfiles += 1
if view == "one-at-a-time":
display_errors(path, errors)
all_files_errors[path] = errors

if view == "groupped":
# TODO: Most likely we want to summarize errors across files since they
# are likely to be similar
# TODO: add our own criteria for validation (i.e. having needed metadata)

# # can't be done since fails to compare different types of errors
# all_errors = sum(errors.values(), [])
# all_error_types = []
# errors_unique = sorted(set(all_errors))
# from collections import Counter
# # Let's make it
# print(
# "{} unique errors in {} files".format(
# len(errors_unique), len(errors))
# )
raise NotImplementedError("TODO")

files_with_errors = [f for f, errors in all_files_errors.items() if errors]

if files_with_errors:
click.secho(
"Summary: Validation errors in {} out of {} files".format(
len(files_with_errors), nfiles
),
bold=True,
fg="red",
)

_process_issues(validator_result, grouping)


def _process_issues(validator_result, grouping):

issues = [i for i in validator_result if i.severity]

purviews = [
list(filter(bool, [i.path, i.path_regex, i.dataset_path]))[0] for i in issues
]
if grouping == "none":
display_errors(
purviews,
[i.id for i in issues],
[i.severity for i in issues],
[i.message for i in issues],
)
raise SystemExit(1)
elif grouping == "path":
for purview in purviews:
applies_to = [
i for i in issues if purview in [i.path, i.path_regex, i.dataset_path]
]
display_errors(
[purview],
[i.id for i in applies_to],
[i.severity for i in applies_to],
[i.message for i in applies_to],
)
else:
click.secho(
"Summary: No validation errors among {} file(s)".format(nfiles),
bold=True,
fg="green",
raise NotImplementedError(
"The `grouping` parameter values currently supported are " "path or None."
)

validation_errors = [i for i in issues if i.severity == Severity.ERROR]

def display_errors(path, errors):
if not errors:
lgr.info("%s: ok", path)
if validation_errors:
raise SystemExit(1)
else:
click.secho("No errors found.", fg="green")


def _get_severity_color(severities):

if Severity.ERROR in severities:
return "red"
elif Severity.WARNING in severities:
return "yellow"
else:
return "blue"


def display_errors(
purviews: List[str],
errors: List[str],
severities: List[Severity],
messages: List[str],
) -> None:
"""
Unified error display for validation CLI, which auto-resolves grouping logic based on
the length of input lists.
Notes
-----
* There is a bit of roundabout and currently untestable logic to deal with potential cases
where the same error has multiple error message, could be removed in the future and removed
by assert if this won't ever be the case.
"""

if all(len(cast(list, i)) == 1 for i in [purviews, errors, severities, messages]):
fg = _get_severity_color(severities)
error_message = f"[{errors[0]}] {purviews[0]}{messages[0]}"
click.secho(error_message, fg=fg)
elif len(purviews) == 1:
group_message = f"{purviews[0]}: {pluralize(len(errors), 'issue')} detected."
fg = _get_severity_color(severities)
click.secho(group_message, fg=fg)
for error, severity, message in zip(errors, severities, messages):
error_message = f" [{error}] {message}"
fg = _get_severity_color([severity])
click.secho(error_message, fg=fg)
elif len(errors) == 1:
fg = _get_severity_color(severities)
group_message = (
f"{errors[0]}: detected in {pluralize(len(purviews), 'purviews')}"
)
if len(set(messages)) == 1:
error_message += f" — {messages[0]}."
click.secho(group_message, fg=fg)
for purview, severity in zip(purviews, severities):
error_message = f" {purview}"
fg = _get_severity_color([severity])
click.secho(error_message, fg=fg)
else:
error_message += "."
for purview, severity, message in zip(purviews, severities, messages):
error_message = f" {purview}{message}"
fg = _get_severity_color([severity])
click.secho(error_message, fg=fg)
else:
lgr.error("%s: %d error(s)", path, len(errors))
for error in errors:
lgr.error(" %s", error)
for purview, error, severity, message in zip(
purviews, errors, severities, messages
):
fg = _get_severity_color([severity])
error_message = f"[{error}] {purview}{message}"
click.secho(error_message, fg=fg)
Loading

0 comments on commit 48a222f

Please sign in to comment.