Skip to content

Commit

Permalink
Simplifying data flow
Browse files Browse the repository at this point in the history
  • Loading branch information
pnrobinson committed Jun 22, 2024
1 parent 7080c98 commit 9a97842
Show file tree
Hide file tree
Showing 29 changed files with 468 additions and 290 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

<groupId>org.monarchinitiative</groupId>
<artifactId>phenopacket2prompt</artifactId>
<version>0.4.3</version>
<version>0.4.4</version>

<name>phenopacket2prompt</name>
<url>https://github.com/monarch-initiative/phenopacket2prompt</url>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,6 @@ public final class HpoOnsetAge implements PhenopacketAge {
private final int totalDays;


/** Late onset Late onset HP:0003584 */
private final static TermId lateAgeOnset = TermId.of("HP:0003584");

/* Middle age onset HP:0003596 */
private final static TermId middleAgeOnset = TermId.of("HP:0003596");

/** Young adult onset; Early young adult onset HP:0025708; Intermediate young adult onset HP:0025709;
* Late young adult onset HP:0025710 */
private final static Set<TermId> youngAdultIds = Set.of(TermId.of("HP:0011462"),
TermId.of("HP:0025708"),
TermId.of("HP:0025709"),
TermId.of("HP:0025710"));
/** All other adult terms, e.g., Adult onset HP:0003581 */
public final static Set<TermId> otherAdult = Set.of(middleAgeOnset, lateAgeOnset,
TermId.of("HP:0003581"));

/**
* One of Antenatal onset HP:0030674; Fetal onset HP:0011461; Late first trimester onset HP:0034199;
* Third trimester onset HP:0034197; Second trimester onset HP:0034198; Embryonal onset HP:0011460
Expand Down Expand Up @@ -72,7 +56,9 @@ public static HpoOnsetAge juvenile() {
/** Late onset HP:0003584 */
private final static TermId lateOnset = TermId.of("HP:0003584");
private final static Set<TermId> adultTermIds = Set.of(adultOnset, youngAdultOnset, earlyYoungAdultAnset,
intermediateYoungAdultOnset, lateYoungAdultOnset, middleAgeOnset, lateOnset);
intermediateYoungAdultOnset, lateYoungAdultOnset, middleAgeOnset, lateOnset);
private final static Set<TermId> youngAdultIds = Set.of(youngAdultOnset, earlyYoungAdultAnset, intermediateYoungAdultOnset, lateYoungAdultOnset);


public HpoOnsetAge(String id, String label) {
this.tid = TermId.of(id);
Expand Down Expand Up @@ -122,6 +108,7 @@ public boolean isCongenital() {
@Override
public boolean isAdult() {
return adultTermIds.contains(tid);
}

public boolean isYoungAdult() {
return youngAdultIds.contains(tid);
Expand All @@ -134,7 +121,7 @@ public boolean isMiddleAge() {

@Override
public boolean isLateAdultAge() {
return tid.equals(lateAgeOnset);
return tid.equals(lateOnset);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ public Map<PhenopacketAge, List<OntologyTerm>> getPhenotypicFeaturesAtSpecifiedA
return phenotypicFeaturesAtSpecifiedAge;
}

public List<OntologyTerm> getPhenotypicFeaturesAtOnsetWithoutSpecifiedAge() {
/* public List<OntologyTerm> getPhenotypicFeaturesAtOnsetWithoutSpecifiedAge() {
return phenotypicFeaturesAtOnsetWithoutSpecifiedAge;
}
}*/
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,71 +9,122 @@ public interface PPKtBuildingBlockGenerator {

// days, months, years -- format singular and plural forms
String days(int d);

String months(int m);

String years(int y);


// Ages
String yearsOld(int y);

String monthsOld(int m);

String daysOld(int d);

String monthDayOld(int m, int d);

String yearsMonthsDaysOld(int y, int m, int d);

// Phrases
String asNewborn();

String atTheAgeOf();


// HPO Terms
String inFetalPeriod();
String isCongenital();
String asInfant();
String inChildhood();
String asAdolescent();
String inAdulthoold();

// sexxage
String she();

String he();

String theProband();

String woman();

String man();

String individual();

String theIndividual();

String girl();

String boy();

String child();

String adolescentGirl();

String adolescentBoy();

String adolescentChild();

String maleInfant();

String femaleInfant();

String infant();

String newbornBoy();

String newbornGirl();

String newborn();

String maleFetus();

String femaleFetus();

String fetus();

String female();

String male();

String adult();

// general
String probandWasA();

String whoPresented();

String presented();

String probandNoAgePresented();

String probandNoAgePresentedWith();
String probandMaleNoAgePresentedWith();
String probandFemaleNoAgePresentedWith();

String probandWasAMale();

String probandWasAFemale();

String probandWasAnIndividual();

String presentedWith();

String with();

String probandFemaleNoAgeExcludedOnly();
String probandMaleNoAgeExcludedOnly();
String probandNoAgeExcludedOnly();

String inWhomManifestationsWereExcluded();

// HPO Terms for onset and vignetttes
String duringFetal();

String asNeonate();

String atBirth();

String asInfant();

String inChildhood();

String asAdolescent();

String asAdult();

String asYoungAdult();

String asMiddleAge();

String asLateOnset();
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ public interface PPKtIndividualInfoGenerator {

String heSheIndividual(PhenopacketSex psex);

String atAge(PhenopacketAge ppktAge);
/**
* Generate an age description intended for the vignettes for a specified age (i.e., not for the very first sentence).
* @param ppktAge
* @return
*/
String atAgeForVignette(PhenopacketAge ppktAge);

}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ public interface PhenopacketTextGenerator {



String QUERY_HEADER();
String GPT_PROMPT_HEADER();


}
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@ public interface PpktPhenotypicFeatureGenerator {
String formatFeatures(List<OntologyTerm> ontologyTerms);

default String featuresAtEncounter(List<OntologyTerm> ontologyTerms) {
return ""; //TODO
return ""; //TODO - implement and make this not default
}


default String featuresAtOnset(String personString, List<OntologyTerm> ontologyTerms) {
return ""; //TODO - implement and make this not default
}

default List<String> getObservedFeaturesAsStr(List<OntologyTerm> oterms) {
return oterms.stream()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ public interface PromptGenerator {

String getVignetteAtAge(PhenopacketAge page, PhenopacketSex psex, List<OntologyTerm> terms);

default String getVignetteAtOnset(PpktIndividual individual){
return ""; // TODO -- NON English need to implement, then remove "default"
}




static PromptGenerator english(){
return new EnglishPromptGenerator();
}
Expand Down Expand Up @@ -62,12 +69,10 @@ static PromptGenerator italian(HpInternational international) {
*/
default String createPrompt(PpktIndividual individual) {
String individualInfo = getIndividualInformation(individual);
// For creating the prompt, we first report the onset and the unspecified terms together, and then
List<OntologyTerm> onsetTerms = individual.getPhenotypicFeaturesAtOnset();
List<OntologyTerm> unspecifiedAgeTerms = individual.getPhenotypicFeaturesAtOnsetWithoutSpecifiedAge();
Map<PhenopacketAge, List<OntologyTerm>> pfMap = individual.extractSpecifiedAgePhenotypicFeatures();
// For creating the prompt, we first report the onset and the unspecified terms together, and then
// report the rest
onsetTerms.addAll(unspecifiedAgeTerms);
// We then report the rest, one for each specified time
String onsetFeatures = formatFeatures(onsetTerms);
StringBuilder sb = new StringBuilder();
sb.append(queryHeader());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public DutchPromptGenerator(PpktPhenotypicFeatureGenerator pfgen) {

@Override
public String queryHeader() {
return ppktTextGenerator.QUERY_HEADER();
return ppktTextGenerator.GPT_PROMPT_HEADER();
}

@Override
Expand All @@ -45,7 +45,7 @@ public String formatFeatures(List<OntologyTerm> ontologyTerms) {

@Override
public String getVignetteAtAge(PhenopacketAge page, PhenopacketSex psex, List<OntologyTerm> terms) {
String ageString = this.ppktAgeSexGenerator.atAge(page);
String ageString = this.ppktAgeSexGenerator.atAgeForVignette(page);
String features = formatFeatures(terms);
return String.format("%s, %s presenteerde met %s", ageString, ppktAgeSexGenerator.heSheIndividual(psex), features);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,7 @@ public String heSheIndividual(PhenopacketSex psex) {
}

@Override
public String atAge(PhenopacketAge ppktAge) {
public String atAgeForVignette(PhenopacketAge ppktAge) {
if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) {
return "Op de leeftijd van " + atIsoAgeExact(ppktAge);
} else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
public class PpktTextDutch implements PhenopacketTextGenerator {

@Override
public String QUERY_HEADER() {
public String GPT_PROMPT_HEADER() {
return """
Ik voer een experiment uit op basis van een klinisch casusrapport om te zien hoe jouw diagnoses zich verhouden tot die van menselijke experts. Ik ga je een deel van een medische casus geven. Je probeert geen patiënten te behandelen. In dit geval ben je “Dr. GPT-4”, een AI-taalmodel dat een diagnose stelt. Hier zijn enkele richtlijnen. Ten eerste bestaat er één definitieve diagnose, en het is een diagnose waarvan tegenwoordig bekend is dat deze ook bij mensen voorkomt. De diagnose wordt bijna altijd bevestigd door een soort genetische test, hoewel in zeldzame gevallen, wanneer een dergelijke test niet bestaat voor een diagnose, de diagnose in plaats daarvan kan worden gesteld op basis van gevalideerde klinische criteria of zeer zelden alleen maar kan worden bevestigd door de mening van deskundigen. Nadat je de casus hebt gelezen, wil ik dat je een differentiële diagnose geeft met een lijst met kandidaat-diagnoses, gerangschikt op waarschijnlijkheid, te beginnen met de meest waarschijnlijke kandidaat. Elke kandidaat moet worden gespecificeerd met de ziektenaam. Als de eerste kandidaat bijvoorbeeld het Branchio-oculofaciaal syndroom is en de tweede cystische fibrose, geef het dan zo in het Engels weer:
Expand Down
Loading

0 comments on commit 9a97842

Please sign in to comment.