Skip to content
This repository was archived by the owner on Sep 14, 2022. It is now read-only.

Commit f7cd5b6

Browse files
committed
Add composite file parser
1 parent 957e322 commit f7cd5b6

File tree

3 files changed

+144
-0
lines changed

3 files changed

+144
-0
lines changed

pytest.ini

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
[pytest]
2+
minversion = 6.0
3+
testpaths =
4+
tests
5+
addopts =
6+
--cov=src
7+
--cov=tests
8+
--cov-branch
9+
--cov-report term-missing:skip-covered

src/parser.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
def split_composite_file(composite_src: list[str]) -> tuple[list[str], list[str]]:
2+
"""
3+
Split composite data file into its components.
4+
5+
A composite data file is assumed to contain 3 chunks of data:
6+
1. Core measurements
7+
2. Custom measurements
8+
3. Landmark coordinates
9+
10+
Core and custom measurements are joined into a single list of anthro measurements.
11+
12+
Each section is assumed to contain one or more header lines, which start with `#`. All header
13+
lines are discarded.
14+
15+
Data rows containing one or more `*` are assumed to be comments and are discarded.
16+
"""
17+
out_chunks = []
18+
in_header = True # File is assumed to start with a header
19+
for line in composite_src:
20+
if in_header:
21+
if line.startswith("#"):
22+
continue
23+
else:
24+
in_header = False
25+
chunk: list[str] = []
26+
27+
if line.startswith("#"):
28+
out_chunks.append(chunk)
29+
in_header = True
30+
continue
31+
32+
# All non-comment lines are assumed to lead off with a validitity flag (0 or 1) that we can
33+
# strip off, if present
34+
line = line.removeprefix("1 ").removeprefix("0 ")
35+
if line.startswith("*"):
36+
# Discard comments
37+
continue
38+
39+
chunk.append(line)
40+
else:
41+
# Append the last chunk when we finish reading the file
42+
out_chunks.append(chunk)
43+
44+
core, custom, landmark = out_chunks
45+
anthro = [*core, *custom]
46+
47+
return anthro, landmark

tests/test_parser.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
from textwrap import dedent
2+
3+
import pytest
4+
from src import parser
5+
6+
7+
COMPOSITE_TEST_CASES = [
8+
(
9+
dedent( # Check that headers, line prefixes are discarded, anthro is joined
10+
"""\
11+
#SizeStream Measurements
12+
#Stored on Tue May 18 06:49:24 2021
13+
#SizeStream Core Measurements
14+
#format - Measurement Valid (1 = valid), Measurement Name, Measurement
15+
#
16+
1 Actual Weight: 1.2
17+
#SizeStream Custom Measurements
18+
#format - Measurement Valid (1 = valid), Measurement Name, Measurement
19+
#
20+
1 Chest: 3.4
21+
#SizeStream Landmarks
22+
#format - Landmarks Valid (1 = valid), Landmark Name, Landmark x y z
23+
#
24+
1 AbdomenBack 5.6 7.8 -9.10
25+
"""
26+
),
27+
["Actual Weight: 1.2", "Chest: 3.4"],
28+
["AbdomenBack 5.6 7.8 -9.10"],
29+
),
30+
(
31+
dedent( # Check that comments are discarded
32+
"""\
33+
#SizeStream Measurements
34+
#Stored on Tue May 18 06:49:24 2021
35+
#SizeStream Core Measurements
36+
#format - Measurement Valid (1 = valid), Measurement Name, Measurement
37+
#
38+
1 Actual Weight: 1.2
39+
#SizeStream Custom Measurements
40+
#format - Measurement Valid (1 = valid), Measurement Name, Measurement
41+
#
42+
1 ***** Body Fat / Fitness: *****
43+
1 Chest: 3.4
44+
#SizeStream Landmarks
45+
#format - Landmarks Valid (1 = valid), Landmark Name, Landmark x y z
46+
#
47+
1 AbdomenBack 5.6 7.8 -9.10
48+
"""
49+
),
50+
["Actual Weight: 1.2", "Chest: 3.4"],
51+
["AbdomenBack 5.6 7.8 -9.10"],
52+
),
53+
(
54+
dedent( # Check all encountered name varieties
55+
"""\
56+
#SizeStream Measurements
57+
#Stored on Tue May 18 06:49:24 2021
58+
#SizeStream Core Measurements
59+
#format - Measurement Valid (1 = valid), Measurement Name, Measurement
60+
#
61+
1 Actual Weight: 1.2
62+
1 Waist at 50%: 1.2
63+
#SizeStream Custom Measurements
64+
#format - Measurement Valid (1 = valid), Measurement Name, Measurement
65+
#
66+
1 Chest: 3.4
67+
1 Body Fat (men): 3.4
68+
#SizeStream Landmarks
69+
#format - Landmarks Valid (1 = valid), Landmark Name, Landmark x y z
70+
#
71+
1 AbdomenBack 5.6 7.8 -9.10
72+
1 Small of the Back 5.6 7.8 -9.10
73+
"""
74+
),
75+
["Actual Weight: 1.2", "Waist at 50%: 1.2", "Chest: 3.4", "Body Fat (men): 3.4"],
76+
["AbdomenBack 5.6 7.8 -9.10", "Small of the Back 5.6 7.8 -9.10"],
77+
),
78+
]
79+
80+
81+
@pytest.mark.parametrize(("raw_src", "truth_anthro", "truth_landmark"), COMPOSITE_TEST_CASES)
82+
def test_composite_file_parsing( # noqa: D103
83+
raw_src: str, truth_anthro: list[str], truth_landmark: list[str]
84+
) -> None:
85+
anthro, landmark = parser.split_composite_file(raw_src.splitlines())
86+
87+
assert anthro == truth_anthro
88+
assert landmark == truth_landmark

0 commit comments

Comments
 (0)