Skip to content

Commit 57d7e1f

Browse files
update test cases
1 parent 6ce8d81 commit 57d7e1f

File tree

1 file changed

+61
-212
lines changed

1 file changed

+61
-212
lines changed

tests/test_extract_fhir.py

Lines changed: 61 additions & 212 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from __future__ import annotations
44

55
import platform
6+
import random
67

78
from typing import Literal
89

@@ -14,7 +15,8 @@
1415
IS_OS_MACOS = platform.system().lower() == "darwin"
1516

1617
API_MAP = {
17-
"openai": "api_key_openai",
18+
"openai": "openai_api_key",
19+
"ollama": "",
1820
}
1921

2022
CI_BACKEND = ["ollama"]
@@ -28,69 +30,56 @@ def test_fixture(transcript_1: str, openai_api_key: str) -> None:
2830

2931

3032
@pytest.mark.skip_on_ci
31-
def test_check_fhir_resources(transcript_1: str, openai_api_key: str) -> None:
32-
"""Test FHIR resources are found correctly."""
33-
aai = AnamnesisAI(backend="openai", api_key=openai_api_key)
34-
fhir_resources = aai._check_possible_fhir_resources(transcript_1)
35-
assert fhir_resources
36-
assert isinstance(fhir_resources, FHIRResourceFoundModel)
37-
assert fhir_resources.Patient
38-
assert fhir_resources.FamilyMemberHistory
39-
assert fhir_resources.AllergyIntolerance
40-
assert fhir_resources.Encounter
33+
@pytest.mark.parametrize("backend", NO_CI_BACKEND)
34+
def test_random_transcript_file_extraction(
35+
list_of_files: list[str],
36+
openai_api_key: str,
37+
backend: Literal["openai", "ollama"],
38+
) -> None:
39+
"""Test FHIR extraction on a randomly selected transcript file."""
40+
assert len(list_of_files) > 0, "No transcript files found for testing"
41+
42+
random_file = random.choice(list_of_files)
43+
print(f"Testing random transcript file: {random_file}")
44+
45+
with open(random_file, "r") as f:
46+
transcript_content = f.read()
47+
48+
api_key = openai_api_key if backend == "openai" else ""
49+
aai = AnamnesisAI(backend=backend, api_key=api_key)
50+
51+
fhir_resources, invalid_fhir_resources = aai.extract_fhir(
52+
transcript_content
53+
)
54+
55+
assert len(fhir_resources) > 0, (
56+
f"Expected at least one FHIR resource from {random_file}"
57+
)
4158

4259

4360
@pytest.mark.skip_on_ci
44-
def test_extract_fhir(transcript_1: str, openai_api_key: str) -> None:
61+
@pytest.mark.parametrize("backend", NO_CI_BACKEND)
62+
def test_extract_fhir(
63+
transcript_1: str,
64+
openai_api_key: str,
65+
backend: Literal["openai", "ollama"],
66+
) -> None:
4567
"""Test FHIR resources are extracted correctly."""
46-
aai = AnamnesisAI(backend="openai", api_key=openai_api_key)
68+
api_key = openai_api_key if backend == "openai" else ""
69+
aai = AnamnesisAI(backend=backend, api_key=api_key)
4770
fhir_resources, invalid_fhir_resources = aai.extract_fhir(transcript_1)
4871

4972
assert len(fhir_resources) > 0, (
5073
"Expected at least one resource in FHIR output"
5174
)
5275

53-
# Check for specific resource types
5476
found_types = {resource.__class__.__name__ for resource in fhir_resources}
5577
required_types = {"Patient", "FamilyMemberHistory", "AllergyIntolerance"}
5678

57-
assert required_types.issubset(found_types), (
58-
"Missing required resource types. Expected "
59-
"{required_types}, found {found_types}"
60-
)
61-
62-
63-
def _check_fhir_resources(
64-
text: str, backend: Literal["openai", "ollama"]
65-
) -> bool:
66-
"""Test if fhir resources prsent in the text."""
67-
print(f">>> {backend}")
68-
69-
api_key_name: str = API_MAP.get(backend, "")
70-
api_key = locals().get(api_key_name, "")
71-
72-
aai = AnamnesisAI(backend=backend, api_key=api_key)
73-
fhir_data = aai._check_possible_fhir_resources(text)
74-
75-
assert fhir_data
76-
assert isinstance(fhir_data, FHIRResourceFoundModel)
77-
assert fhir_data.Patient is True
78-
assert fhir_data.Condition is False
79-
# note: improve prompt to have better answer
80-
# assert fhir_data.Practitioner is True
81-
# assert fhir_data.FamilyMemberHistory is False
82-
# assert fhir_data.AllergyIntolerance is False
83-
# assert fhir_data.Immunization is False
84-
# assert fhir_data.Procedure is False
85-
# assert fhir_data.CarePlan is False
86-
# assert fhir_data.Encounter is True
87-
# assert fhir_data.Observation is True
88-
# assert fhir_data.MedicationStatement is True
89-
# assert fhir_data.DiagnosticReport is True
90-
# assert fhir_data.ServiceRequest is False
91-
# assert fhir_data.MedicationRequest is False
92-
93-
return True
79+
assert required_types.issubset(
80+
found_types
81+
), f"""Missing required resource types. Expected {required_types},
82+
found {found_types}"""
9483

9584

9685
@pytest.mark.skip_on_ci
@@ -99,176 +88,36 @@ def _check_fhir_resources(
9988
def test_check_fhir_resources_ci(
10089
transcript_1: str, backend: Literal["openai", "ollama"]
10190
) -> None:
102-
"""Test if fhir resources prsent in the text."""
103-
assert _check_fhir_resources(transcript_1, backend)
104-
105-
106-
@pytest.mark.skip_on_ci
107-
@pytest.mark.parametrize("backend", NO_CI_BACKEND)
108-
def test_check_fhir_resources_no_ci(
109-
transcript_1: str, backend: Literal["openai", "ollama"]
110-
) -> None:
111-
"""Test if fhir resources prsent in the text."""
112-
assert _check_fhir_resources(transcript_1, backend)
91+
"""Test if fhir resources present in the text for CI environments."""
92+
aai = AnamnesisAI(backend=backend, api_key="")
93+
fhir_data = aai._check_possible_fhir_resources(transcript_1)
11394

114-
115-
def _check_transcript_1(
116-
text: str, backend: Literal["openai", "ollama"]
117-
) -> bool:
118-
"""Test if transcript 1."""
119-
print(f">>> {backend}")
120-
121-
api_key_name: str = API_MAP.get(backend, "")
122-
api_key = locals().get(api_key_name, "")
123-
124-
aai = AnamnesisAI(backend=backend, api_key=api_key)
125-
fhir_data = aai.extract_fhir(text)
12695
assert fhir_data
127-
assert isinstance(fhir_data, dict)
128-
assert len(fhir_data)
129-
130-
return True
131-
132-
133-
@pytest.mark.skip_on_ci
134-
@pytest.mark.skipif(IS_OS_MACOS, reason="ollama is not working on macos")
135-
@pytest.mark.parametrize("backend", CI_BACKEND)
136-
def test_transcript_1(
137-
transcript_1: str, backend: Literal["openai", "ollama"]
138-
) -> None:
139-
"""Test if transcript 1."""
140-
assert _check_transcript_1(transcript_1, backend)
141-
142-
143-
@pytest.mark.skip_on_ci
144-
@pytest.mark.parametrize("backend", NO_CI_BACKEND)
145-
def test_transcript_1_no_ci(
146-
transcript_1: str, backend: Literal["openai", "ollama"]
147-
) -> None:
148-
"""Test if transcript 1."""
149-
assert _check_transcript_1(transcript_1, backend)
150-
151-
152-
def _check_synthetic_files(
153-
synthetic_files_content: dict[str, str],
154-
backend: Literal["openai", "ollama"],
155-
) -> bool:
156-
"""Test if each synthetic data file can be processed."""
157-
assert len(synthetic_files_content) > 0
158-
159-
api_key_name: str = API_MAP.get(backend, "")
160-
api_key = locals().get(api_key_name, "")
161-
162-
aai = AnamnesisAI(backend=backend, api_key=api_key)
163-
164-
for filename, content in synthetic_files_content.items():
165-
print(f"Testing synthetic file: {filename}")
166-
fhir_data = aai.extract_fhir(content)
167-
print(f">>> {backend}")
168-
assert fhir_data is not None
169-
assert fhir_data
170-
assert isinstance(fhir_data, dict)
171-
assert len(fhir_data) >= 0
172-
assert len(fhir_data)
173-
174-
return True
96+
assert isinstance(fhir_data, FHIRResourceFoundModel)
97+
assert fhir_data.Patient is True
98+
assert fhir_data.FamilyMemberHistory is True
17599

176100

177101
@pytest.mark.skip_on_ci
178102
@pytest.mark.skipif(IS_OS_MACOS, reason="ollama is not working on macos")
179103
@pytest.mark.parametrize("backend", CI_BACKEND)
180-
def test_synthetic_files_ci(
181-
synthetic_files_content: dict[str, str],
182-
backend: Literal["openai", "ollama"],
183-
) -> None:
184-
"""Test if each synthetic data file can be processed."""
185-
assert _check_synthetic_files(synthetic_files_content, backend)
186-
187-
188-
@pytest.mark.skip_on_ci
189-
@pytest.mark.parametrize("backend", NO_CI_BACKEND)
190-
def test_synthetic_files_no_ci(
191-
synthetic_files_content: dict[str, str],
192-
backend: Literal["openai", "ollama"],
104+
def test_random_transcript_file_extraction_ci(
105+
list_of_files: list[str], backend: Literal["openai", "ollama"]
193106
) -> None:
194-
"""Test if each synthetic data file can be processed."""
195-
assert _check_synthetic_files(synthetic_files_content, backend)
196-
197-
107+
"""Test FHIR extraction on a randomly selected transcript file for CI."""
108+
assert len(list_of_files) > 0, "No transcript files found for testing"
198109

199-
@pytest.fixture
200-
def conversation_text() -> str:
201-
"""Fixture to read the conversation text from the file."""
202-
file_path = "tests/data/synthetic/enhanced_conversation.txt"
203-
try:
204-
with open(file_path, "r") as file:
205-
return file.read()
206-
except FileNotFoundError:
207-
raise FileNotFoundError(
208-
f"Could not find the conversation file: {file_path}"
209-
)
210-
211-
212-
@pytest.mark.skip_on_ci
213-
def test_transcript_extraction_structure(
214-
conversation_text: str, api_key: str
215-
) -> None:
216-
"""Test basic structure of extracted FHIR data."""
217-
fhir_data = extract_fhir(conversation_text, api_key)
218-
assert fhir_data
219-
assert isinstance(fhir_data, dict)
220-
assert len(fhir_data) > 0
110+
random_file = random.choice(list_of_files)
111+
print(f"Testing random transcript file: {random_file}")
221112

222-
resources = fhir_data.get("resources", fhir_data.get("entry", []))
223-
assert len(resources) > 0, "Expected at least one resource in FHIR output"
113+
with open(random_file, "r") as f:
114+
transcript_content = f.read()
224115

225-
for resource in resources:
226-
assert "resourceType" in resource, (
227-
"Expected 'resourceType' in each resource"
228-
)
116+
aai = AnamnesisAI(backend=backend, api_key="")
229117

230-
231-
@pytest.mark.skip_on_ci
232-
def test_extracted_fhir_resources(
233-
conversation_text: str, api_key: str
234-
) -> None:
235-
"""Confirm correct extraction of expected FHIR resources from text."""
236-
fhir_data = extract_fhir(conversation_text, api_key)
237-
print("fhir suggested: ", fhir_data)
238-
239-
resources = fhir_data.get("resources", fhir_data.get("entry", []))
240-
241-
patient_count = 0
242-
condition_count = 0
243-
observation_count = 0
244-
medication_statement_count = 0
245-
medication_request_count = 0
246-
service_request_count = 0
247-
248-
for resource in resources:
249-
if resource["resourceType"] == "Patient":
250-
patient_count += 1
251-
elif resource["resourceType"] == "Condition":
252-
condition_count += 1
253-
elif resource["resourceType"] == "Observation":
254-
observation_count += 1
255-
elif resource["resourceType"] == "MedicationStatement":
256-
medication_statement_count += 1
257-
elif resource["resourceType"] == "MedicationRequest":
258-
medication_request_count += 1
259-
elif resource["resourceType"] == "ServiceRequest":
260-
service_request_count += 1
261-
262-
# Assert that the expected number of each resource type is found
263-
# assert patient_count == 1
264-
assert condition_count >= 1 # Expect at least one condition (hypertension)
265-
assert (
266-
observation_count >= 2
267-
) # Expect at least observations (BP, temp, knee pain, swelling)
268-
assert (
269-
medication_statement_count >= 1
270-
) # Expect at least two MedicationStatements (Lisinopril, Ibuprofen)
271-
assert (
272-
medication_request_count >= 1
273-
) # Expect one MedicationRequest (Lisinopril refill)
274-
assert service_request_count >= 1 # Expect one ServiceRequest (X-ray)
118+
fhir_resources, invalid_fhir_resources = aai.extract_fhir(
119+
transcript_content
120+
)
121+
assert len(fhir_resources) > 0, (
122+
f"Expected at least one FHIR resource from {random_file}"
123+
)

0 commit comments

Comments
 (0)