Add composite file parser

sco1 · sco1 · commit f7cd5b6248fe · 2021-05-25T14:17:38.000-04:00
diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,9 @@
+[pytest]
+minversion = 6.0
+testpaths =
+    tests
+addopts =
+    --cov=src
+    --cov=tests
+    --cov-branch
+    --cov-report term-missing:skip-covered
diff --git a/src/parser.py b/src/parser.py
@@ -0,0 +1,47 @@
+def split_composite_file(composite_src: list[str]) -> tuple[list[str], list[str]]:
+    """
+    Split composite data file into its components.
+
+    A composite data file is assumed to contain 3 chunks of data:
+        1. Core measurements
+        2. Custom measurements
+        3. Landmark coordinates
+
+    Core and custom measurements are joined into a single list of anthro measurements.
+
+    Each section is assumed to contain one or more header lines, which start with `#`. All header
+    lines are discarded.
+
+    Data rows containing one or more `*` are assumed to be comments and are discarded.
+    """
+    out_chunks = []
+    in_header = True  # File is assumed to start with a header
+    for line in composite_src:
+        if in_header:
+            if line.startswith("#"):
+                continue
+            else:
+                in_header = False
+                chunk: list[str] = []
+
+        if line.startswith("#"):
+            out_chunks.append(chunk)
+            in_header = True
+            continue
+
+        # All non-comment lines are assumed to lead off with a validitity flag (0 or 1) that we can
+        # strip off, if present
+        line = line.removeprefix("1  ").removeprefix("0  ")
+        if line.startswith("*"):
+            # Discard comments
+            continue
+
+        chunk.append(line)
+    else:
+        # Append the last chunk when we finish reading the file
+        out_chunks.append(chunk)
+
+    core, custom, landmark = out_chunks
+    anthro = [*core, *custom]
+
+    return anthro, landmark
diff --git a/tests/test_parser.py b/tests/test_parser.py
@@ -0,0 +1,88 @@
+from textwrap import dedent
+
+import pytest
+from src import parser
+
+
+COMPOSITE_TEST_CASES = [
+    (
+        dedent(  # Check that headers, line prefixes are discarded, anthro is joined
+            """\
+            #SizeStream Measurements
+            #Stored on Tue May 18 06:49:24 2021
+            #SizeStream Core Measurements
+            #format - Measurement Valid (1 = valid), Measurement Name, Measurement
+            #
+            1  Actual Weight: 1.2
+            #SizeStream Custom Measurements
+            #format - Measurement Valid (1 = valid), Measurement Name, Measurement
+            #
+            1  Chest: 3.4
+            #SizeStream Landmarks
+            #format - Landmarks Valid (1 = valid), Landmark Name, Landmark x y z
+            #
+            1  AbdomenBack	5.6	7.8	-9.10
+            """
+        ),
+        ["Actual Weight: 1.2", "Chest: 3.4"],
+        ["AbdomenBack	5.6	7.8	-9.10"],
+    ),
+    (
+        dedent(  # Check that comments are discarded
+            """\
+            #SizeStream Measurements
+            #Stored on Tue May 18 06:49:24 2021
+            #SizeStream Core Measurements
+            #format - Measurement Valid (1 = valid), Measurement Name, Measurement
+            #
+            1  Actual Weight: 1.2
+            #SizeStream Custom Measurements
+            #format - Measurement Valid (1 = valid), Measurement Name, Measurement
+            #
+            1  *****  Body Fat / Fitness: *****
+            1  Chest: 3.4
+            #SizeStream Landmarks
+            #format - Landmarks Valid (1 = valid), Landmark Name, Landmark x y z
+            #
+            1  AbdomenBack	5.6	7.8	-9.10
+            """
+        ),
+        ["Actual Weight: 1.2", "Chest: 3.4"],
+        ["AbdomenBack	5.6	7.8	-9.10"],
+    ),
+    (
+        dedent(  # Check all encountered name varieties
+            """\
+            #SizeStream Measurements
+            #Stored on Tue May 18 06:49:24 2021
+            #SizeStream Core Measurements
+            #format - Measurement Valid (1 = valid), Measurement Name, Measurement
+            #
+            1  Actual Weight: 1.2
+            1  Waist at 50%: 1.2
+            #SizeStream Custom Measurements
+            #format - Measurement Valid (1 = valid), Measurement Name, Measurement
+            #
+            1  Chest: 3.4
+            1  Body Fat (men): 3.4
+            #SizeStream Landmarks
+            #format - Landmarks Valid (1 = valid), Landmark Name, Landmark x y z
+            #
+            1  AbdomenBack	5.6	7.8	-9.10
+            1  Small of the Back	5.6	7.8	-9.10
+            """
+        ),
+        ["Actual Weight: 1.2", "Waist at 50%: 1.2", "Chest: 3.4", "Body Fat (men): 3.4"],
+        ["AbdomenBack	5.6	7.8	-9.10", "Small of the Back	5.6	7.8	-9.10"],
+    ),
+]
+
+
+@pytest.mark.parametrize(("raw_src", "truth_anthro", "truth_landmark"), COMPOSITE_TEST_CASES)
+def test_composite_file_parsing(  # noqa: D103
+    raw_src: str, truth_anthro: list[str], truth_landmark: list[str]
+) -> None:
+    anthro, landmark = parser.split_composite_file(raw_src.splitlines())
+
+    assert anthro == truth_anthro
+    assert landmark == truth_landmark