Merge pull request #1104 from dandi/gh-943

Structured validation results
dandi · Oct 28, 2022 · 48a222f · 48a222f
2 parents 1e43339 + 6205109
commit 48a222f
Show file tree

Hide file tree

Showing 13 changed files with 773 additions and 336 deletions.
diff --git a/dandi/bids_utils.py b/dandi/bids_utils.py
diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py
@@ -1,14 +1,17 @@
 import logging
 import os
+from typing import List, cast
 
 import click
 
-from .base import devel_debug_option, devel_option, lgr, map_to_click_exceptions
+from .base import devel_debug_option, devel_option, map_to_click_exceptions
+from ..utils import pluralize
+from ..validate_types import Severity
 
 
 @click.command()
 @click.option(
-    "--schema", help="Validate against new BIDS schema version", metavar="VERSION"
+    "--schema", help="Validate against new BIDS schema version.", metavar="VERSION"
 )
 @click.option(
     "--report-path",
@@ -20,13 +23,21 @@
     is_flag=True,
     help="Whether to write a report under a unique path in the DANDI log directory.",
 )
+@click.option(
+    "--grouping",
+    "-g",
+    help="How to group error/warning reporting.",
+    type=click.Choice(["none", "path"], case_sensitive=False),
+    default="none",
+)
 @click.argument("paths", nargs=-1, type=click.Path(exists=True, dir_okay=True))
 @map_to_click_exceptions
 def validate_bids(
     paths,
     schema,
     report,
     report_path,
+    grouping="none",
 ):
     """Validate BIDS paths.
 
@@ -36,20 +47,16 @@ def validate_bids(
         dandi validate-bids /my/path
     """
 
-    from ..bids_utils import is_valid, report_errors
     from ..validate import validate_bids as validate_bids_
 
-    validator_result = validate_bids_(
+    validator_result = validate_bids_(  # Controller
         *paths,
         report=report,
         report_path=report_path,
         schema_version=schema,
     )
-    valid = is_valid(validator_result)
-    report_errors(validator_result)
 
-    if not valid:
-        raise SystemExit(1)
+    _process_issues(validator_result, grouping)
 
 
 @click.command()
@@ -60,13 +67,32 @@ def validate_bids(
     default=False,
     is_flag=True,
 )
+@click.option(
+    "--grouping",
+    "-g",
+    help="How to group error/warning reporting.",
+    type=click.Choice(["none", "path"], case_sensitive=False),
+    default="none",
+)
 @click.argument("paths", nargs=-1, type=click.Path(exists=True, dir_okay=True))
 @devel_debug_option()
 @map_to_click_exceptions
-def validate(paths, schema=None, devel_debug=False, allow_any_path=False):
+def validate(
+    paths,
+    schema=None,
+    devel_debug=False,
+    allow_any_path=False,
+    grouping="none",
+):
     """Validate files for NWB and DANDI compliance.
 
     Exits with non-0 exit code if any file is not compliant.
+
+    Parameters
+    ----------
+
+    grouping : str, optional
+        A string which is either "", "error", or "path" — "error" not yet implemented.
     """
     from ..pynwb_utils import ignore_benign_pynwb_warnings
     from ..validate import validate as validate_
@@ -84,61 +110,117 @@ def validate(paths, schema=None, devel_debug=False, allow_any_path=False):
     # at this point, so we ignore it although ideally there should be a formal
     # way to get relevant warnings (not errors) from PyNWB
     ignore_benign_pynwb_warnings()
-    view = "one-at-a-time"  # TODO: rename, add groupped
 
-    all_files_errors = {}
-    nfiles = 0
-    for path, errors in validate_(
+    validator_result = validate_(
         *paths,
         schema_version=schema,
         devel_debug=devel_debug,
         allow_any_path=allow_any_path,
-    ):
-        nfiles += 1
-        if view == "one-at-a-time":
-            display_errors(path, errors)
-        all_files_errors[path] = errors
-
-    if view == "groupped":
-        # TODO: Most likely we want to summarize errors across files since they
-        # are likely to be similar
-        # TODO: add our own criteria for validation (i.e. having needed metadata)
-
-        # # can't be done since fails to compare different types of errors
-        # all_errors = sum(errors.values(), [])
-        # all_error_types = []
-        # errors_unique = sorted(set(all_errors))
-        # from collections import Counter
-        # # Let's make it
-        # print(
-        #     "{} unique errors in {} files".format(
-        #     len(errors_unique), len(errors))
-        # )
-        raise NotImplementedError("TODO")
-
-    files_with_errors = [f for f, errors in all_files_errors.items() if errors]
-
-    if files_with_errors:
-        click.secho(
-            "Summary: Validation errors in {} out of {} files".format(
-                len(files_with_errors), nfiles
-            ),
-            bold=True,
-            fg="red",
+    )
+
+    _process_issues(validator_result, grouping)
+
+
+def _process_issues(validator_result, grouping):
+
+    issues = [i for i in validator_result if i.severity]
+
+    purviews = [
+        list(filter(bool, [i.path, i.path_regex, i.dataset_path]))[0] for i in issues
+    ]
+    if grouping == "none":
+        display_errors(
+            purviews,
+            [i.id for i in issues],
+            [i.severity for i in issues],
+            [i.message for i in issues],
         )
-        raise SystemExit(1)
+    elif grouping == "path":
+        for purview in purviews:
+            applies_to = [
+                i for i in issues if purview in [i.path, i.path_regex, i.dataset_path]
+            ]
+            display_errors(
+                [purview],
+                [i.id for i in applies_to],
+                [i.severity for i in applies_to],
+                [i.message for i in applies_to],
+            )
     else:
-        click.secho(
-            "Summary: No validation errors among {} file(s)".format(nfiles),
-            bold=True,
-            fg="green",
+        raise NotImplementedError(
+            "The `grouping` parameter values currently supported are " "path or None."
         )
 
+    validation_errors = [i for i in issues if i.severity == Severity.ERROR]
 
-def display_errors(path, errors):
-    if not errors:
-        lgr.info("%s: ok", path)
+    if validation_errors:
+        raise SystemExit(1)
+    else:
+        click.secho("No errors found.", fg="green")
+
+
+def _get_severity_color(severities):
+
+    if Severity.ERROR in severities:
+        return "red"
+    elif Severity.WARNING in severities:
+        return "yellow"
+    else:
+        return "blue"
+
+
+def display_errors(
+    purviews: List[str],
+    errors: List[str],
+    severities: List[Severity],
+    messages: List[str],
+) -> None:
+    """
+    Unified error display for validation CLI, which auto-resolves grouping logic based on
+    the length of input lists.
+
+
+    Notes
+    -----
+    * There is a bit of roundabout and currently untestable logic to deal with potential cases
+    where the same error has multiple error message, could be removed in the future and removed
+    by assert if this won't ever be the case.
+    """
+
+    if all(len(cast(list, i)) == 1 for i in [purviews, errors, severities, messages]):
+        fg = _get_severity_color(severities)
+        error_message = f"[{errors[0]}] {purviews[0]} — {messages[0]}"
+        click.secho(error_message, fg=fg)
+    elif len(purviews) == 1:
+        group_message = f"{purviews[0]}: {pluralize(len(errors), 'issue')} detected."
+        fg = _get_severity_color(severities)
+        click.secho(group_message, fg=fg)
+        for error, severity, message in zip(errors, severities, messages):
+            error_message = f"  [{error}] {message}"
+            fg = _get_severity_color([severity])
+            click.secho(error_message, fg=fg)
+    elif len(errors) == 1:
+        fg = _get_severity_color(severities)
+        group_message = (
+            f"{errors[0]}: detected in {pluralize(len(purviews), 'purviews')}"
+        )
+        if len(set(messages)) == 1:
+            error_message += f" — {messages[0]}."
+            click.secho(group_message, fg=fg)
+            for purview, severity in zip(purviews, severities):
+                error_message = f"  {purview}"
+                fg = _get_severity_color([severity])
+                click.secho(error_message, fg=fg)
+        else:
+            error_message += "."
+            for purview, severity, message in zip(purviews, severities, messages):
+                error_message = f"  {purview} — {message}"
+                fg = _get_severity_color([severity])
+                click.secho(error_message, fg=fg)
     else:
-        lgr.error("%s: %d error(s)", path, len(errors))
-        for error in errors:
-            lgr.error("  %s", error)
+        for purview, error, severity, message in zip(
+            purviews, errors, severities, messages
+        ):
+            fg = _get_severity_color([severity])
+            error_message = f"[{error}] {purview} — {message}"
+            click.secho(error_message, fg=fg)