From 0f0f5d9f0d22c49520069004d6a23b308b1557c7 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Mon, 29 Apr 2024 13:32:15 +0200 Subject: [PATCH 01/39] documentation --- mkdocs.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/mkdocs.yml b/mkdocs.yml index 1b7d9af..8aa20a5 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -35,6 +35,7 @@ nav: - "Template": 'languages.md' - "English": "english.md" - Setup: "setup.md" + - Batch: "batch.md" plugins: - search From bad00116573ff673cec4ca72394d0e5493bf579b Mon Sep 17 00:00:00 2001 From: "Leonardo local Kubuntu 22.04" Date: Mon, 29 Apr 2024 15:43:26 +0200 Subject: [PATCH 02/39] created dutch folder in implementation, added dutch to GPT translate --- .../cmd/GptTranslateCommand.java | 6 + .../impl/dutch/DutchPromptGenerator.java | 65 +++ .../impl/dutch/PpktIndividualDutch.java | 524 ++++++++++++++++++ .../dutch/PpktPhenotypicfeatureDutch.java | 90 +++ .../output/impl/dutch/PpktTextDutch.java | 21 + 5 files changed, 706 insertions(+) create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktPhenotypicfeatureDutch.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktTextDutch.java diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java index 68cd748..47521c3 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java @@ -58,10 +58,16 @@ public Integer call() throws Exception { PpktIndividual individual = new PpktIndividual(new File(ppkt)); String prompt = generator.createPrompt(individual); System.out.println(prompt); + // SPANISH System.out.println("SPANISH"); PromptGenerator spanish = PromptGenerator.spanish(hpo, internationalMap.get("es")); prompt = spanish.createPrompt(individual); System.out.println(prompt); + // DUTCH + System.out.println("DUTCH"); + PromptGenerator dutch = PromptGenerator.dutch(hpo, internationalMap.get("nl")); + prompt = dutch.createPrompt(individual); + System.out.println(prompt); return 0; } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java new file mode 100644 index 0000000..6ae13e4 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java @@ -0,0 +1,65 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.dutch; + +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketIndividualInformationGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketTextGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator; +import org.monarchinitiative.phenopacket2prompt.output.impl.dutch.PpktIndividualDutch; +import org.monarchinitiative.phenopacket2prompt.output.impl.dutch.PpktTextDutch; + +import java.util.List; + +public class DutchPromptGenerator implements PromptGenerator { + + private final Ontology hpo; + + + private final PhenopacketIndividualInformationGenerator ppktAgeSexGenerator; + + private final PhenopacketTextGenerator ppktTextGenerator; + + private final PpktPhenotypicFeatureGenerator ppktPhenotypicFeatureGenerator; + + + + public DutchPromptGenerator(Ontology hpo, PpktPhenotypicFeatureGenerator pfgen) { + this.hpo = hpo; + ppktAgeSexGenerator = new PpktIndividualDutch(); + ppktTextGenerator = new PpktTextDutch(); + this.ppktPhenotypicFeatureGenerator = pfgen; + } + + @Override + public String queryHeader() { + return ppktTextGenerator.QUERY_HEADER(); + } + + @Override + public String getIndividualInformation(PpktIndividual ppktIndividual) { + return this.ppktAgeSexGenerator.getIndividualDescription(ppktIndividual); + } + + @Override + public String formatFeatures(List ontologyTerms) { + return ppktPhenotypicFeatureGenerator.formatFeatures(ontologyTerms); + } + + @Override + public String getVignetteAtAge(PhenopacketAge page, PhenopacketSex psex, List terms) { + String ageString = this.ppktAgeSexGenerator.atAge(page); + String features = formatFeatures(terms); + return String.format("%s, %s presentó %s", ageString, ppktAgeSexGenerator.heSheIndividual(psex), features); + } + + + + + + + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java new file mode 100644 index 0000000..53c44a4 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java @@ -0,0 +1,524 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.dutch; + +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenopacket2prompt.model.*; +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketIndividualInformationGenerator; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +public class PpktIndividualDutch implements PhenopacketIndividualInformationGenerator { + + + /** + * Equivalent of "The clinical + * @param individual + * @return + */ + public String ageAndSexAtOnset(PpktIndividual individual) { + Optional ageOpt = individual.getAgeAtOnset(); + return ""; + } + + + + + public String ageAndSexAtLastExamination(PpktIndividual individual) { + PhenopacketSex psex = individual.getSex(); + Optional ageOpt = individual.getAgeAtLastExamination(); + if (ageOpt.isEmpty()) { + ageOpt = individual.getAgeAtOnset(); + } + String sex; + switch (psex) { + case FEMALE -> sex = "una paciente femenina"; + case MALE -> sex = "un paciente masculino"; + default -> sex = "una persona"; + }; + + if (ageOpt.isEmpty()) { + return sex; + } + PhenopacketAge age = ageOpt.get(); + if (age.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoage = (Iso8601Age) age; + int y = isoage.getYears(); + int m = isoage.getMonths(); + int d = isoage.getDays(); + if (psex.equals(PhenopacketSex.FEMALE)) { + if (y > 17) { + return String.format("una mujer de %d años", y); + } else if (y > 9) { + return String.format("una adolescente de %d años", y); + + } else if (y > 0) { + return String.format("una niña de %d años", y); + } else if (m>0) { + return String.format("una bebe niña de %d meses", m); + } else { + return String.format("una recien nacida %d meses", d); + } + } + } else { + // age is an HPO onset term, we do not have an exact date + } + if (age.isChild()) { + return switch (psex) { + case FEMALE -> "una niña"; + case MALE -> "un niño"; + default -> "un niño"; // difficult to be gender neutral + }; + } else if (age.isCongenital()) { + return switch (psex) { + case FEMALE -> "una recien nacida"; + case MALE -> "un recien nacido"; + default -> "un recien nacido"; + }; + } else if (age.isFetus()) { + return switch (psex) { + case FEMALE -> "un feto femenino"; + case MALE -> "un feto masculino"; + default -> "un feto"; + }; + } else if (age.isInfant()) { + return switch (psex) { + case FEMALE -> "un bebé femenino"; + case MALE -> "un bebé masculino"; + default -> "un bebé"; + }; + } else { + return switch (psex) { + case FEMALE -> "un mujer"; + case MALE -> "un hombre"; + default -> "una persona adulta"; + }; + } + } + + + private String individualName(PpktIndividual individual) { + PhenopacketSex psex = individual.getSex(); + Optional ageOpt = individual.getAgeAtLastExamination(); + if (ageOpt.isEmpty()) { + ageOpt = individual.getAgeAtOnset(); + } + if (ageOpt.isEmpty()) { + return switch (psex) { + case FEMALE -> "female"; + case MALE -> "male"; + default -> "individual"; + }; + } + PhenopacketAge age = ageOpt.get();; + if (age.isChild()) { + return switch (psex) { + case FEMALE -> "girl"; + case MALE -> "boy"; + default -> "child"; + }; + } else if (age.isCongenital()) { + return switch (psex) { + case FEMALE -> "female newborn"; + case MALE -> "male newborn"; + default -> "newborn"; + }; + } else if (age.isFetus()) { + return switch (psex) { + case FEMALE -> "female fetus"; + case MALE -> "male fetus"; + default -> "fetus"; + }; + } else if (age.isInfant()) { + return switch (psex) { + case FEMALE -> "female infant"; + case MALE -> "male infant"; + default -> "infant"; + }; + } else { + return switch (psex) { + case FEMALE -> "woman"; + case MALE -> "man"; + default -> "individual"; + }; + } + } + + + /* @Override + public String individualWithAge(PhenopacketAge ppktAge) { + if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + return ppktAge.age() + " old"; + } else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + String label = ppktAge.age(); // something like "Infantile onset" + return switch (label) { + case "Infantile onset" -> "bebé"; + case "Childhood onset" -> "niño"; + case "Neonatal onset" -> "neonate"; + case "Congenital onset" -> "recién nacido"; + case "Adult onset" -> "adulto"; + default-> String.format("During the %s", label.replace(" onset", "")); + }; + } else { + return ""; // should never get here + } + } +*/ + + private String atIsoAgeExact(PhenopacketAge ppktAge) { + Iso8601Age iso8601Age = (Iso8601Age) ppktAge; + int y = iso8601Age.getYears(); + int m = iso8601Age.getMonths(); + int d = iso8601Age.getDays(); + + if (y > 10) { + return String.format("%d años", y); + } else if (y > 0) { + if (m > 1) { + return String.format("%d años y %d meses", y, m); + } else if (m == 1) { + return String.format("%d años y un mes", y); + } else { + return String.format("%d años", y); + } + } else if (m>0) { + return String.format("%d meses y %d días", m, d); + } else { + return String.format("%d días", d); + } + } + + + @Override + public String getIndividualDescription(PpktIndividual individual) { + Optional lastExamOpt = individual.getAgeAtLastExamination(); + Optional onsetOpt = individual.getAgeAtOnset(); + PhenopacketSex psex = individual.getSex(); + if (lastExamOpt.isPresent() && onsetOpt.isPresent()) { + return onsetAndLastEncounterAvailable(psex, lastExamOpt.get(), onsetOpt.get()); + } else if (lastExamOpt.isPresent()) { + return lastEncounterAvailable(psex, lastExamOpt.get()); + } else if (onsetOpt.isPresent()) { + return onsetAvailable(psex, onsetOpt.get()); + } else { + return ageNotAvailable(psex); + } + } + + + private String iso8601ToYearMonth(Iso8601Age iso8601Age) { + if (iso8601Age.getMonths() == 0) { + return String.format("de %d años", iso8601Age.getYears()); + } else { + return String.format("de %d años y %d meses", iso8601Age.getYears(), iso8601Age.getMonths()); + } + } + + private String iso8601ToMonthDay(Iso8601Age iso8601Age) { + int m = iso8601Age.getMonths(); + int d = iso8601Age.getDays(); + if (m == 0) { + return String.format("de %d dias", d); + } else if (d>0){ + return String.format("de %d meses y %d dias", m, d); + } else { + return String.format("de %d meses", m); + } + } + + /** + * Create a phrase such as "at the age of 7 years, 4 months, and 2 days" + * Leave out the months and days if they are zero. + * @param isoAge + * @return + */ + private String iso8601AtAgeOf(Iso8601Age isoAge) { + List components = new ArrayList<>(); + + if (isoAge.getYears()>1) { + components.add(String.format("%d years", isoAge.getYears())); + } else if (isoAge.getYears() == 1) { + components.add("1 year"); + } + if (isoAge.getMonths() > 1) { + components.add(String.format("%d months", isoAge.getMonths())); + } else if (isoAge.getMonths() == 1) { + components.add("1 month"); + } + if (isoAge.getDays()>1) { + components.add(String.format("%d days", isoAge.getDays())); + } else if (isoAge.getDays()==1) { + components.add("1 day"); + } + if (components.isEmpty()) { + return "as a newborn"; + } else if (components.size() == 1) { + return "at the age of " + components.get(0); + } else if (components.size() == 2) { + return "at the age of " + components.get(0) + " and " + components.get(1); + } else { + return "at the age of " + components.get(0) + "m " + components.get(1) + + ", and " + components.get(2); + } + } + + private String onsetTermAtAgeOf(HpoOnsetAge hpoOnsetTermAge) { + if (hpoOnsetTermAge.isFetus()) { + return "en el periodo fetal"; + } else if (hpoOnsetTermAge.isCongenital()) { + return "en el periodo neonatal"; + } else if (hpoOnsetTermAge.isInfant()) { + return "como un bebe"; + } else if (hpoOnsetTermAge.isChild()) { + return "en la niñez"; + } else if (hpoOnsetTermAge.isJuvenile()) { + return "como adolescente"; + } else { + return "en la edad adulta"; + } + } + + + private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8601Age) { + int y = iso8601Age.getYears(); + int m = iso8601Age.getMonths(); + int d = iso8601Age.getDays(); + // if older + if (y>17) { + return switch (psex) { + case FEMALE -> String.format("mujer de %d años", y); + case MALE -> String.format("hombre de %d años", y); + default -> String.format("persona de %d años", y); + }; + } else if (y>9) { + return switch (psex) { + case FEMALE -> String.format("una adolescente de %d años", y); + case MALE -> String.format("un adolescente de %d años", y); + default -> String.format("un adolescente de %d años", y); + }; + } else if (y>0) { + return switch (psex) { + case FEMALE -> String.format("niña %s", iso8601ToYearMonth(iso8601Age)); + case MALE -> String.format("niño %s", iso8601ToYearMonth(iso8601Age)); + default -> String.format("niño %s", iso8601ToYearMonth(iso8601Age)); + }; + } else if (m>0 || d> 0) { + return switch (psex) { + case FEMALE -> String.format("una infante %s", iso8601ToMonthDay(iso8601Age)); + case MALE -> String.format("un infante %s", iso8601ToMonthDay(iso8601Age)); + default -> String.format("un infante %s", iso8601ToMonthDay(iso8601Age)); + }; + } else { + return switch (psex) { + case FEMALE -> "recien nacida girl"; + case MALE -> "recien nacido"; + default -> "recien nacido"; + }; + } + } + + private String hpoOnsetIndividualDescription(PhenopacketSex psex, HpoOnsetAge hpoOnsetTermAge) { + if (hpoOnsetTermAge.isFetus()) { + return switch (psex) { + case FEMALE -> "female fetus"; + case MALE -> "male fetus"; + default -> "fetus"; + }; + } else if (hpoOnsetTermAge.isCongenital()) { + return switch (psex) { + case FEMALE -> "female newborn"; + case MALE -> "male newborn"; + default -> "newborn"; + }; + } else if (hpoOnsetTermAge.isInfant()) { + return switch (psex) { + case FEMALE -> "female infant"; + case MALE -> "male infant"; + default -> "infant"; + }; + } else if (hpoOnsetTermAge.isChild()) { + return switch (psex) { + case FEMALE -> "girl"; + case MALE -> "boy"; + default -> "child"; + }; + } else if (hpoOnsetTermAge.isJuvenile()) { + return switch (psex) { + case FEMALE -> "female adolescent"; + case MALE -> "male adolescent"; + default -> "adolescent"; + }; + }else { + return switch (psex) { + case FEMALE -> "woman"; + case MALE -> "man"; + default -> "adult"; + }; + } + } + + /** + * A sentence such as The proband was a 39-year old woman who presented at the age of 12 years with + * HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. This method returns the phrase that ends with "with" + * El sujeto era un niño de 1 año y 10 meses que se presentó como recién nacido con un filtrum largo. + * @param psex + * @param lastExamAge + * @param onsetAge + * @return + */ + private String onsetAndLastEncounterAvailable(PhenopacketSex psex, PhenopacketAge lastExamAge, PhenopacketAge onsetAge) { + String individualDescription; + String onsetDescription; + if (lastExamAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) lastExamAge; + individualDescription = iso8601individualDescription(psex, isoAge); + } else if (lastExamAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) lastExamAge; + individualDescription = hpoOnsetIndividualDescription(psex,hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize last exam age type " + lastExamAge.ageType()); + } + if (onsetAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) onsetAge; + onsetDescription = iso8601AtAgeOf(isoAge); + } else if (onsetAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) onsetAge; + onsetDescription = onsetTermAtAgeOf(hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); + } + return String.format("El sujeto era %s que se presentó %s con", individualDescription, onsetDescription); + } + + + /** + * Age at last examination available but age of onset not available + * The proband was a 39-year old woman who presented with HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. + * @param psex + * @param lastExamAge + */ + private String lastEncounterAvailable(PhenopacketSex psex, PhenopacketAge lastExamAge) { + String individualDescription; + if (lastExamAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) lastExamAge; + individualDescription = iso8601individualDescription(psex, isoAge); + } else if (lastExamAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) lastExamAge; + individualDescription = hpoOnsetIndividualDescription(psex,hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize last exam age type " + lastExamAge.ageType()); + } + return String.format("The proband was a %s who presented with", individualDescription); + } + + /** + * Age at last examination not available but age of onset available + * The proband presented at the age of 12 years with HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. + * @param psex + * @param onsetAge + * @return + */ + private String onsetAvailable(PhenopacketSex psex, PhenopacketAge onsetAge) { + String onsetDescription; + if (onsetAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) onsetAge; + onsetDescription = iso8601AtAgeOf(isoAge); + } else if (onsetAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) onsetAge; + onsetDescription = onsetTermAtAgeOf(hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); + } + return String.format("The proband presented %s with", onsetDescription, onsetDescription); + } + + private String ageNotAvailable(PhenopacketSex psex) { + return switch (psex) { + case FEMALE -> "The proband was a female who presented with"; + case MALE -> "The proband was a male who presented with"; + default -> "The proband presented with"; + }; + } + + @Override + public String heSheIndividual(PhenopacketSex psex) { + return switch (psex) { + case FEMALE -> "el"; + case MALE -> "ella"; + default -> "la persona"; + }; + } + + @Override + public String atAge(PhenopacketAge ppktAge) { + if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + return "A la edad de " + atIsoAgeExact(ppktAge); + } else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + String label = ppktAge.age(); // something like "Infantile onset" + return switch (label) { + case "Infantile onset" -> "Durante el periodo infantil"; + case "Childhood onset" -> "Durante la infancia"; + case "Neonatal onset" -> "Durante el periodo neonatal"; + case "Congenital onset" -> "Al nacer"; + case "Adult onset" -> "Como adulto"; + default-> String.format("Durante el %s periodo", label.replace(" onset", "")); + }; + } else { + return ""; // should never get here + } + } + + // @Override + public String ppktSex(PpktIndividual individual) { + PhenopacketSex psex = individual.getSex(); + Optional ageOpt = individual.getAgeAtLastExamination(); + if (ageOpt.isEmpty()) { + ageOpt = individual.getAgeAtOnset(); + } + if (ageOpt.isEmpty()) { + return switch (psex) { + case FEMALE -> "female"; + case MALE -> "male"; + default -> "individual"; + }; + } + PhenopacketAge age = ageOpt.get();; + if (age.isChild()) { + return switch (psex) { + case FEMALE -> "girl"; + case MALE -> "boy"; + default -> "child"; + }; + } else if (age.isCongenital()) { + return switch (psex) { + case FEMALE -> "female newborn"; + case MALE -> "male newborn"; + default -> "newborn"; + }; + } else if (age.isFetus()) { + return switch (psex) { + case FEMALE -> "female fetus"; + case MALE -> "male fetus"; + default -> "fetus"; + }; + } else if (age.isInfant()) { + return switch (psex) { + case FEMALE -> "female infant"; + case MALE -> "male infant"; + default -> "infant"; + }; + } else { + return switch (psex) { + case FEMALE -> "woman"; + case MALE -> "man"; + default -> "individual"; + }; + } + } + + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktPhenotypicfeatureDutch.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktPhenotypicfeatureDutch.java new file mode 100644 index 0000000..ab07f01 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktPhenotypicfeatureDutch.java @@ -0,0 +1,90 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.dutch; + +import org.monarchinitiative.phenopacket2prompt.international.HpInternational; +import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; +import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.function.Predicate; + +public class PpktPhenotypicfeatureDutch implements PpktPhenotypicFeatureGenerator { + + private final HpInternational dutch; + + public PpktPhenotypicfeatureDutch(HpInternational international) { + dutch = international; + } + + + + private List getTranslations(List ontologyTerms) { + List labels = new ArrayList<>(); + for (var term: ontologyTerms) { + Optional opt = dutch.getLabel(term.getTid()); + if (opt.isPresent()) { + labels.add(opt.get()); + } else { + System.err.printf("[ERROR] Could not find %s translation for %s (%s).\n", dutch.getLanguageAcronym(), term.getLabel(), term.getTid().getValue()); + } + } + return labels; + } + + + private final Set vowels = Set.of('A', 'E', 'I', 'O', 'U', 'Y'); + + private String getOxfordCommaList(List items) { + if (items.size() == 1) { + return items.get(0); + } + if (items.size() == 2) { + // no comma if we just have two items. + // one item will work with the below code + return String.join(" and ", items); + } + String symList = String.join(", ", items); + int jj = symList.lastIndexOf(", "); + if (jj > 0) { + String end = symList.substring(jj+2); + if (vowels.contains(end.charAt(0))) { + symList = symList.substring(0, jj) + " i " + end; + } else { + symList = symList.substring(0, jj) + " y " + end; + } + } + return symList; + } + + @Override + public String formatFeatures(List ontologyTerms) { + List observedTerms = ontologyTerms.stream() + .filter(Predicate.not(OntologyTerm::isExcluded)) + .toList(); + List observedLabels = getTranslations(observedTerms); + List excludedTerms = ontologyTerms.stream() + .filter(OntologyTerm::isExcluded).toList(); + List excludedLabels = getTranslations(excludedTerms); + if (observedLabels.isEmpty() && excludedLabels.isEmpty()) { + return "no phenotypic abnormalities"; // should never happen, actually! + } else if (excludedLabels.isEmpty()) { + return getOxfordCommaList(observedLabels) + ". "; + } else if (observedLabels.isEmpty()) { + if (excludedLabels.size() > 1) { + return String.format("por lo que se excluyeron %s.", getOxfordCommaList(excludedLabels)); + } else { + return String.format("por lo que %s fue excluido.",excludedLabels.get(0)); + } + } else { + String exclusion; + if (excludedLabels.size() == 1) { + exclusion = String.format(" y se excluyó %s.", getOxfordCommaList(excludedLabels)); + } else { + exclusion = String.format(" y se excluyeron %s.", getOxfordCommaList(excludedLabels)); + } + return getOxfordCommaList(observedLabels) + exclusion; + } + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktTextDutch.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktTextDutch.java new file mode 100644 index 0000000..34c88a7 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktTextDutch.java @@ -0,0 +1,21 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.dutch; + +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketTextGenerator; + +public class PpktTextDutch implements PhenopacketTextGenerator { + + @Override + public String QUERY_HEADER() { + return """ +Ik doe een experiment met een klinisch verslag om te zien hoe jullie diagnoses zich verhouden tot die van menselijke experts. Ik geef je een deel van een medisch geval. Je probeert geen patiënten te behandelen. In dit geval ben je "Dr. GPT-4", een AI-taalmodel dat een diagnose stelt. Hier zijn enkele richtlijnen. Ten eerste is er één definitieve diagnose en dat is een diagnose waarvan bekend is dat die bij mensen bestaat. De diagnose wordt bijna altijd bevestigd door een of andere genetische test, maar in zeldzame gevallen waarin zo'n test niet bestaat voor een diagnose, kan de diagnose in plaats daarvan worden gesteld op basis van gevalideerde klinische criteria of heel zelden gewoon worden bevestigd door de mening van een expert. Nadat je de casus hebt gelezen, wil ik dat je een differentiaaldiagnose stelt met een lijst van mogelijke diagnoses gerangschikt naar waarschijnlijkheid, te beginnen met de meest waarschijnlijke kandidaat. Elke kandidaat moet gespecificeerd worden met de OMIM identifier en de naam van de ziekte. Bijvoorbeeld, als de eerste kandidaat het Branchiooculofaciaal syndroom is en de tweede Cystic fibrosis, geef dan dit: + +1. OMIM:113620 - Branchiooculofaciaal syndroom +2. OMIM:219700 - Taaislijmziekte + +Deze lijst moet zoveel diagnoses bevatten als je redelijk acht. + +Je hoeft je redenering niet uit te leggen, je hoeft alleen de diagnoses samen met de OMIM-identifiers op te sommen. Dit is het geval: +"""; + } + +} From 2e98f660be9b25475a4c93effb2de00b11cda017 Mon Sep 17 00:00:00 2001 From: "Leonardo local Kubuntu 22.04" Date: Tue, 30 Apr 2024 16:09:59 +0200 Subject: [PATCH 03/39] added code infrastructure for dutch, actual translations still to be done --- .../phenopacket2prompt/cmd/GbtTranslateBatchCommand.java | 2 ++ .../phenopacket2prompt/output/PromptGenerator.java | 5 +++++ .../output/impl/dutch/DutchPromptGenerator.java | 2 +- .../output/impl/dutch/PpktIndividualDutch.java | 6 +++--- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java index c70d93d..7d633db 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java @@ -65,6 +65,8 @@ public Integer call() throws Exception { // output all non-English languages here PromptGenerator spanish = PromptGenerator.spanish(hpo, internationalMap.get("es")); outputPromptsInternational(ppktFiles, hpo, "es", spanish); + PromptGenerator dutch = PromptGenerator.dutch(hpo, internationalMap.get("nl")); + outputPromptsInternational(ppktFiles, hpo, "nl", dutch); // output file with correct diagnosis list outputCorrectResults(correctResultList); return 0; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java index 6603b38..98e380d 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java @@ -8,6 +8,7 @@ import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; import org.monarchinitiative.phenopacket2prompt.output.impl.english.EnglishPromptGenerator; import org.monarchinitiative.phenopacket2prompt.output.impl.spanish.*; +import org.monarchinitiative.phenopacket2prompt.output.impl.dutch.*; import java.util.List; import java.util.Map; @@ -33,6 +34,10 @@ static PromptGenerator spanish(Ontology hpo, HpInternational international) { return new SpanishPromptGenerator(hpo, pfgen); } + static PromptGenerator dutch(Ontology hpo, HpInternational international) { + PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureDutch(international); + return new DutchPromptGenerator(hpo, pfgen); + } /** * The following structure should work for most other languages, but the function * can be overridden if necessary. diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java index 6ae13e4..f7f1d19 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java @@ -53,7 +53,7 @@ public String formatFeatures(List ontologyTerms) { public String getVignetteAtAge(PhenopacketAge page, PhenopacketSex psex, List terms) { String ageString = this.ppktAgeSexGenerator.atAge(page); String features = formatFeatures(terms); - return String.format("%s, %s presentó %s", ageString, ppktAgeSexGenerator.heSheIndividual(psex), features); + return String.format("%s, %s dutch words here %s", ageString, ppktAgeSexGenerator.heSheIndividual(psex), features); } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java index 53c44a4..c8580d8 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java @@ -32,9 +32,9 @@ public String ageAndSexAtLastExamination(PpktIndividual individual) { } String sex; switch (psex) { - case FEMALE -> sex = "una paciente femenina"; - case MALE -> sex = "un paciente masculino"; - default -> sex = "una persona"; + case FEMALE -> sex = "a dutch female"; + case MALE -> sex = "a dutch male"; + default -> sex = "a dutch person"; }; if (ageOpt.isEmpty()) { From fbb66873b20ceeb0a1ab648ac5eecd7d5473aebf Mon Sep 17 00:00:00 2001 From: Kyran Wissink Date: Thu, 2 May 2024 12:10:28 +0200 Subject: [PATCH 04/39] Translated everything to Dutch. Should run and test if syntax is correct. --- .../impl/dutch/DutchPromptGenerator.java | 2 +- .../impl/dutch/PpktIndividualDutch.java | 265 +++++++++--------- .../dutch/PpktPhenotypicfeatureDutch.java | 10 +- 3 files changed, 138 insertions(+), 139 deletions(-) diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java index f7f1d19..239495e 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java @@ -53,7 +53,7 @@ public String formatFeatures(List ontologyTerms) { public String getVignetteAtAge(PhenopacketAge page, PhenopacketSex psex, List terms) { String ageString = this.ppktAgeSexGenerator.atAge(page); String features = formatFeatures(terms); - return String.format("%s, %s dutch words here %s", ageString, ppktAgeSexGenerator.heSheIndividual(psex), features); + return String.format("%s, %s presenteerde met %s", ageString, ppktAgeSexGenerator.heSheIndividual(psex), features); } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java index c8580d8..2d7e38b 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java @@ -32,9 +32,9 @@ public String ageAndSexAtLastExamination(PpktIndividual individual) { } String sex; switch (psex) { - case FEMALE -> sex = "a dutch female"; - case MALE -> sex = "a dutch male"; - default -> sex = "a dutch person"; + case FEMALE -> sex = "zij"; + case MALE -> sex = "hij"; + default -> sex = "de persoon"; }; if (ageOpt.isEmpty()) { @@ -48,16 +48,15 @@ public String ageAndSexAtLastExamination(PpktIndividual individual) { int d = isoage.getDays(); if (psex.equals(PhenopacketSex.FEMALE)) { if (y > 17) { - return String.format("una mujer de %d años", y); + return String.format("een vrouw van %d jaar oud", y); } else if (y > 9) { - return String.format("una adolescente de %d años", y); - + return String.format("een adolescente vrouw van %d jaar oud", y); } else if (y > 0) { - return String.format("una niña de %d años", y); + return String.format("een meisje van %d jaar oud", y); } else if (m>0) { - return String.format("una bebe niña de %d meses", m); + return String.format("een baby van %d maanden oud", m); } else { - return String.format("una recien nacida %d meses", d); + return String.format("een pasgeboren vrouwelijke baby van %d dagen oud", d); } } } else { @@ -65,33 +64,33 @@ public String ageAndSexAtLastExamination(PpktIndividual individual) { } if (age.isChild()) { return switch (psex) { - case FEMALE -> "una niña"; - case MALE -> "un niño"; - default -> "un niño"; // difficult to be gender neutral + case FEMALE -> "een meisje"; + case MALE -> "een jongetje"; + default -> "een kind"; // difficult to be gender neutral }; } else if (age.isCongenital()) { return switch (psex) { - case FEMALE -> "una recien nacida"; - case MALE -> "un recien nacido"; - default -> "un recien nacido"; + case FEMALE -> "een pasgeboren meisje"; + case MALE -> "een pasgeboren jongetje"; + default -> "een pasgeborene"; }; } else if (age.isFetus()) { return switch (psex) { - case FEMALE -> "un feto femenino"; - case MALE -> "un feto masculino"; - default -> "un feto"; + case FEMALE -> "een vrouwelijke foetus"; + case MALE -> "een mannelijke foetus"; + default -> "een foetus"; }; } else if (age.isInfant()) { return switch (psex) { - case FEMALE -> "un bebé femenino"; - case MALE -> "un bebé masculino"; - default -> "un bebé"; + case FEMALE -> "een vrouwelijke baby"; + case MALE -> "een mannelijke baby"; + default -> "een baby"; }; } else { return switch (psex) { - case FEMALE -> "un mujer"; - case MALE -> "un hombre"; - default -> "una persona adulta"; + case FEMALE -> "een vrouw"; + case MALE -> "een man"; + default -> "een volwassene"; }; } } @@ -105,41 +104,41 @@ private String individualName(PpktIndividual individual) { } if (ageOpt.isEmpty()) { return switch (psex) { - case FEMALE -> "female"; - case MALE -> "male"; - default -> "individual"; + case FEMALE -> "vrouw"; + case MALE -> "man"; + default -> "individu"; }; } PhenopacketAge age = ageOpt.get();; if (age.isChild()) { return switch (psex) { - case FEMALE -> "girl"; - case MALE -> "boy"; - default -> "child"; + case FEMALE -> "meisje"; + case MALE -> "jongetje"; + default -> "kind"; }; } else if (age.isCongenital()) { return switch (psex) { - case FEMALE -> "female newborn"; - case MALE -> "male newborn"; - default -> "newborn"; + case FEMALE -> "pasgeboren vrouwelijke baby"; + case MALE -> "pasgeboren mannelijke baby"; + default -> "pasgeborene"; }; } else if (age.isFetus()) { return switch (psex) { - case FEMALE -> "female fetus"; - case MALE -> "male fetus"; - default -> "fetus"; + case FEMALE -> "vrouwelijke foetus"; + case MALE -> "mannelijke foetus"; + default -> "foetus"; }; } else if (age.isInfant()) { return switch (psex) { - case FEMALE -> "female infant"; - case MALE -> "male infant"; - default -> "infant"; + case FEMALE -> "vrouwelijke baby"; + case MALE -> "mannelijke baby"; + default -> "baby"; }; } else { return switch (psex) { - case FEMALE -> "woman"; + case FEMALE -> "vrouw"; case MALE -> "man"; - default -> "individual"; + default -> "individu"; }; } } @@ -172,19 +171,19 @@ private String atIsoAgeExact(PhenopacketAge ppktAge) { int d = iso8601Age.getDays(); if (y > 10) { - return String.format("%d años", y); + return String.format("%d jaar oud", y); } else if (y > 0) { if (m > 1) { - return String.format("%d años y %d meses", y, m); + return String.format("%d jaar en %d maanden oud", y, m); } else if (m == 1) { - return String.format("%d años y un mes", y); + return String.format("%d jaar en één maand oud", y); } else { - return String.format("%d años", y); + return String.format("%d jaar oud", y); } } else if (m>0) { - return String.format("%d meses y %d días", m, d); + return String.format("%d maanden en %d dagen oud", m, d); } else { - return String.format("%d días", d); + return String.format("%d dagen oud", d); } } @@ -208,9 +207,9 @@ public String getIndividualDescription(PpktIndividual individual) { private String iso8601ToYearMonth(Iso8601Age iso8601Age) { if (iso8601Age.getMonths() == 0) { - return String.format("de %d años", iso8601Age.getYears()); + return String.format("van %d jaar oud", iso8601Age.getYears()); } else { - return String.format("de %d años y %d meses", iso8601Age.getYears(), iso8601Age.getMonths()); + return String.format("van %d jaar en %d maanden", iso8601Age.getYears(), iso8601Age.getMonths()); } } @@ -218,11 +217,11 @@ private String iso8601ToMonthDay(Iso8601Age iso8601Age) { int m = iso8601Age.getMonths(); int d = iso8601Age.getDays(); if (m == 0) { - return String.format("de %d dias", d); + return String.format("van %d dagen oud", d); } else if (d>0){ - return String.format("de %d meses y %d dias", m, d); + return String.format("van %d maanden en %d dagen oud", m, d); } else { - return String.format("de %d meses", m); + return String.format("van %d maanden oud", m); } } @@ -236,45 +235,45 @@ private String iso8601AtAgeOf(Iso8601Age isoAge) { List components = new ArrayList<>(); if (isoAge.getYears()>1) { - components.add(String.format("%d years", isoAge.getYears())); + components.add(String.format("%d jaren oud", isoAge.getYears())); } else if (isoAge.getYears() == 1) { - components.add("1 year"); + components.add("één jaar oud"); } if (isoAge.getMonths() > 1) { - components.add(String.format("%d months", isoAge.getMonths())); + components.add(String.format("%d maanden oud", isoAge.getMonths())); } else if (isoAge.getMonths() == 1) { - components.add("1 month"); + components.add("één maand oud"); } if (isoAge.getDays()>1) { - components.add(String.format("%d days", isoAge.getDays())); + components.add(String.format("%d dagen oud", isoAge.getDays())); } else if (isoAge.getDays()==1) { - components.add("1 day"); + components.add("één dag oud"); } if (components.isEmpty()) { - return "as a newborn"; + return "als pasgeborene"; } else if (components.size() == 1) { - return "at the age of " + components.get(0); + return "op de leeftijd van " + components.get(0); } else if (components.size() == 2) { - return "at the age of " + components.get(0) + " and " + components.get(1); + return "op de leeftijd van " + components.get(0) + " en " + components.get(1); } else { - return "at the age of " + components.get(0) + "m " + components.get(1) + - ", and " + components.get(2); + return "op de leeftijd van " + components.get(0) + " " + components.get(1) + + ", en " + components.get(2); } } private String onsetTermAtAgeOf(HpoOnsetAge hpoOnsetTermAge) { if (hpoOnsetTermAge.isFetus()) { - return "en el periodo fetal"; + return "in de foetale periode"; } else if (hpoOnsetTermAge.isCongenital()) { - return "en el periodo neonatal"; + return "in de neonatale periode"; } else if (hpoOnsetTermAge.isInfant()) { - return "como un bebe"; + return "als baby"; } else if (hpoOnsetTermAge.isChild()) { - return "en la niñez"; + return "als kind"; } else if (hpoOnsetTermAge.isJuvenile()) { - return "como adolescente"; + return "als adolescent"; } else { - return "en la edad adulta"; + return "als volwassene"; } } @@ -286,33 +285,33 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 // if older if (y>17) { return switch (psex) { - case FEMALE -> String.format("mujer de %d años", y); - case MALE -> String.format("hombre de %d años", y); - default -> String.format("persona de %d años", y); + case FEMALE -> String.format("een vrouw van %d jaar oud", y); + case MALE -> String.format("een man van %d jaar oud", y); + default -> String.format("een persoon van %d jaar oud", y); }; } else if (y>9) { return switch (psex) { - case FEMALE -> String.format("una adolescente de %d años", y); - case MALE -> String.format("un adolescente de %d años", y); - default -> String.format("un adolescente de %d años", y); + case FEMALE -> String.format("een vrouwelijke adolescent van %d jaar oud", y); + case MALE -> String.format("een mannelijke adolescent van %d jaar oud", y); + default -> String.format("een adolescent van %d jaar oud", y); }; } else if (y>0) { return switch (psex) { - case FEMALE -> String.format("niña %s", iso8601ToYearMonth(iso8601Age)); - case MALE -> String.format("niño %s", iso8601ToYearMonth(iso8601Age)); - default -> String.format("niño %s", iso8601ToYearMonth(iso8601Age)); + case FEMALE -> String.format("meisje %s", iso8601ToYearMonth(iso8601Age)); + case MALE -> String.format("jongetje %s", iso8601ToYearMonth(iso8601Age)); + default -> String.format("kind %s", iso8601ToYearMonth(iso8601Age)); }; } else if (m>0 || d> 0) { return switch (psex) { - case FEMALE -> String.format("una infante %s", iso8601ToMonthDay(iso8601Age)); - case MALE -> String.format("un infante %s", iso8601ToMonthDay(iso8601Age)); - default -> String.format("un infante %s", iso8601ToMonthDay(iso8601Age)); + case FEMALE -> String.format("een vrouwelijke baby %s", iso8601ToMonthDay(iso8601Age)); + case MALE -> String.format("een mannelijke baby %s", iso8601ToMonthDay(iso8601Age)); + default -> String.format("een baby %s", iso8601ToMonthDay(iso8601Age)); }; } else { return switch (psex) { - case FEMALE -> "recien nacida girl"; - case MALE -> "recien nacido"; - default -> "recien nacido"; + case FEMALE -> "een pasgeboren meisje"; + case MALE -> "een pasgeboren jongetje"; + default -> "een pasgeborene"; }; } } @@ -320,39 +319,39 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 private String hpoOnsetIndividualDescription(PhenopacketSex psex, HpoOnsetAge hpoOnsetTermAge) { if (hpoOnsetTermAge.isFetus()) { return switch (psex) { - case FEMALE -> "female fetus"; - case MALE -> "male fetus"; - default -> "fetus"; + case FEMALE -> "vrouwelijke foetus"; + case MALE -> "mannelijke foetus"; + default -> "foetus"; }; } else if (hpoOnsetTermAge.isCongenital()) { return switch (psex) { - case FEMALE -> "female newborn"; - case MALE -> "male newborn"; - default -> "newborn"; + case FEMALE -> "pasgeboren meisje"; + case MALE -> "pasgeboren jongetje"; + default -> "pasgeborene"; }; } else if (hpoOnsetTermAge.isInfant()) { return switch (psex) { - case FEMALE -> "female infant"; - case MALE -> "male infant"; - default -> "infant"; + case FEMALE -> "vrouwelijke baby"; + case MALE -> "mannelijke baby"; + default -> "baby"; }; } else if (hpoOnsetTermAge.isChild()) { return switch (psex) { - case FEMALE -> "girl"; - case MALE -> "boy"; - default -> "child"; + case FEMALE -> "meisje"; + case MALE -> "jongetje"; + default -> "kind"; }; } else if (hpoOnsetTermAge.isJuvenile()) { return switch (psex) { - case FEMALE -> "female adolescent"; - case MALE -> "male adolescent"; + case FEMALE -> "vrouwelijke adolescent"; + case MALE -> "mannelijke adolescent"; default -> "adolescent"; }; }else { return switch (psex) { - case FEMALE -> "woman"; + case FEMALE -> "vrouw"; case MALE -> "man"; - default -> "adult"; + default -> "volwassene"; }; } } @@ -389,7 +388,7 @@ private String onsetAndLastEncounterAvailable(PhenopacketSex psex, PhenopacketAg // should never happen throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); } - return String.format("El sujeto era %s que se presentó %s con", individualDescription, onsetDescription); + return String.format("De proband was een %s die presenteerde met %s ", individualDescription, onsetDescription); } @@ -411,7 +410,7 @@ private String lastEncounterAvailable(PhenopacketSex psex, PhenopacketAge lastEx // should never happen throw new PhenolRuntimeException("Did not recognize last exam age type " + lastExamAge.ageType()); } - return String.format("The proband was a %s who presented with", individualDescription); + return String.format("De proband was een %s die presenteerde met ", individualDescription); } /** @@ -433,39 +432,39 @@ private String onsetAvailable(PhenopacketSex psex, PhenopacketAge onsetAge) { // should never happen throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); } - return String.format("The proband presented %s with", onsetDescription, onsetDescription); + return String.format("De proband presenteerde met %s", onsetDescription, onsetDescription); } private String ageNotAvailable(PhenopacketSex psex) { return switch (psex) { - case FEMALE -> "The proband was a female who presented with"; - case MALE -> "The proband was a male who presented with"; - default -> "The proband presented with"; + case FEMALE -> "De proband was een vrouw die presenteerde met"; + case MALE -> "De proband was een man die presenteerde met"; + default -> "De proband presenteerde met"; }; } @Override public String heSheIndividual(PhenopacketSex psex) { return switch (psex) { - case FEMALE -> "el"; - case MALE -> "ella"; - default -> "la persona"; + case FEMALE -> "zij"; + case MALE -> "hij"; + default -> "de persoon"; }; } @Override public String atAge(PhenopacketAge ppktAge) { if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { - return "A la edad de " + atIsoAgeExact(ppktAge); + return "Op de leeftijd van " + atIsoAgeExact(ppktAge); } else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { String label = ppktAge.age(); // something like "Infantile onset" return switch (label) { - case "Infantile onset" -> "Durante el periodo infantil"; - case "Childhood onset" -> "Durante la infancia"; - case "Neonatal onset" -> "Durante el periodo neonatal"; - case "Congenital onset" -> "Al nacer"; - case "Adult onset" -> "Como adulto"; - default-> String.format("Durante el %s periodo", label.replace(" onset", "")); + case "Infantile onset" -> "Tijdens de infantiele periode"; + case "Childhood onset" -> "Tijdens de jeugd"; + case "Neonatal onset" -> "Tijdens de neonatale periode"; + case "Congenital onset" -> "Bij geboorte"; + case "Adult onset" -> "Op volwassen leeftijd"; + default-> String.format("Tijdens de %s periode", label.replace(" onset", "")); }; } else { return ""; // should never get here @@ -481,41 +480,41 @@ public String ppktSex(PpktIndividual individual) { } if (ageOpt.isEmpty()) { return switch (psex) { - case FEMALE -> "female"; - case MALE -> "male"; - default -> "individual"; + case FEMALE -> "vrouw"; + case MALE -> "man"; + default -> "individu"; }; } PhenopacketAge age = ageOpt.get();; if (age.isChild()) { return switch (psex) { - case FEMALE -> "girl"; - case MALE -> "boy"; - default -> "child"; + case FEMALE -> "meisje"; + case MALE -> "jongetje"; + default -> "kind"; }; } else if (age.isCongenital()) { return switch (psex) { - case FEMALE -> "female newborn"; - case MALE -> "male newborn"; - default -> "newborn"; + case FEMALE -> "vrouwelijke pasgeborene"; + case MALE -> "mannelijke pasgeborene"; + default -> "pasgeborene"; }; } else if (age.isFetus()) { return switch (psex) { - case FEMALE -> "female fetus"; - case MALE -> "male fetus"; - default -> "fetus"; + case FEMALE -> "vrouwelijke foetus"; + case MALE -> "mannelijke foetus"; + default -> "foetus"; }; } else if (age.isInfant()) { return switch (psex) { - case FEMALE -> "female infant"; - case MALE -> "male infant"; - default -> "infant"; + case FEMALE -> "vrouwelijke baby"; + case MALE -> "mannelijke baby"; + default -> "baby"; }; } else { return switch (psex) { - case FEMALE -> "woman"; + case FEMALE -> "vrouw"; case MALE -> "man"; - default -> "individual"; + default -> "individu"; }; } } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktPhenotypicfeatureDutch.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktPhenotypicfeatureDutch.java index ab07f01..a574216 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktPhenotypicfeatureDutch.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktPhenotypicfeatureDutch.java @@ -43,7 +43,7 @@ private String getOxfordCommaList(List items) { if (items.size() == 2) { // no comma if we just have two items. // one item will work with the below code - return String.join(" and ", items); + return String.join(" en ", items); } String symList = String.join(", ", items); int jj = symList.lastIndexOf(", "); @@ -73,16 +73,16 @@ public String formatFeatures(List ontologyTerms) { return getOxfordCommaList(observedLabels) + ". "; } else if (observedLabels.isEmpty()) { if (excludedLabels.size() > 1) { - return String.format("por lo que se excluyeron %s.", getOxfordCommaList(excludedLabels)); + return String.format("dus %s zijn uitgesloten.", getOxfordCommaList(excludedLabels)); } else { - return String.format("por lo que %s fue excluido.",excludedLabels.get(0)); + return String.format("Dus %s werd uitgesloten.",excludedLabels.get(0)); } } else { String exclusion; if (excludedLabels.size() == 1) { - exclusion = String.format(" y se excluyó %s.", getOxfordCommaList(excludedLabels)); + exclusion = String.format("en %s werd uitgesloten.", getOxfordCommaList(excludedLabels)); } else { - exclusion = String.format(" y se excluyeron %s.", getOxfordCommaList(excludedLabels)); + exclusion = String.format("en %s zijn uitgesloten.", getOxfordCommaList(excludedLabels)); } return getOxfordCommaList(observedLabels) + exclusion; } From b378797ae530015bd110aeb083fc2b3bf860c358 Mon Sep 17 00:00:00 2001 From: Kyran Wissink Date: Thu, 2 May 2024 14:51:59 +0200 Subject: [PATCH 05/39] Translated to Dutch with correct syntax --- .../output/impl/dutch/PpktIndividualDutch.java | 16 ++++++++-------- .../impl/dutch/PpktPhenotypicfeatureDutch.java | 8 ++++---- .../output/impl/dutch/PpktTextDutch.java | 4 ++-- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java index 2d7e38b..ae95387 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java @@ -235,19 +235,19 @@ private String iso8601AtAgeOf(Iso8601Age isoAge) { List components = new ArrayList<>(); if (isoAge.getYears()>1) { - components.add(String.format("%d jaren oud", isoAge.getYears())); + components.add(String.format("%d jaar", isoAge.getYears())); } else if (isoAge.getYears() == 1) { components.add("één jaar oud"); } if (isoAge.getMonths() > 1) { - components.add(String.format("%d maanden oud", isoAge.getMonths())); + components.add(String.format("%d maanden", isoAge.getMonths())); } else if (isoAge.getMonths() == 1) { components.add("één maand oud"); } if (isoAge.getDays()>1) { - components.add(String.format("%d dagen oud", isoAge.getDays())); + components.add(String.format("%d dagen", isoAge.getDays())); } else if (isoAge.getDays()==1) { - components.add("één dag oud"); + components.add("één dag"); } if (components.isEmpty()) { return "als pasgeborene"; @@ -256,7 +256,7 @@ private String iso8601AtAgeOf(Iso8601Age isoAge) { } else if (components.size() == 2) { return "op de leeftijd van " + components.get(0) + " en " + components.get(1); } else { - return "op de leeftijd van " + components.get(0) + " " + components.get(1) + + return "op de leeftijd van " + components.get(0) + ". " + components.get(1) + ", en " + components.get(2); } } @@ -388,7 +388,7 @@ private String onsetAndLastEncounterAvailable(PhenopacketSex psex, PhenopacketAg // should never happen throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); } - return String.format("De proband was een %s die presenteerde met %s ", individualDescription, onsetDescription); + return String.format("De proband was een %s die %s presenteerde met", individualDescription, onsetDescription); } @@ -410,7 +410,7 @@ private String lastEncounterAvailable(PhenopacketSex psex, PhenopacketAge lastEx // should never happen throw new PhenolRuntimeException("Did not recognize last exam age type " + lastExamAge.ageType()); } - return String.format("De proband was een %s die presenteerde met ", individualDescription); + return String.format("De proband was een %s die presenteerde met", individualDescription); } /** @@ -432,7 +432,7 @@ private String onsetAvailable(PhenopacketSex psex, PhenopacketAge onsetAge) { // should never happen throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); } - return String.format("De proband presenteerde met %s", onsetDescription, onsetDescription); + return String.format("De proband presenteerde %s met", onsetDescription, onsetDescription); } private String ageNotAvailable(PhenopacketSex psex) { diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktPhenotypicfeatureDutch.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktPhenotypicfeatureDutch.java index a574216..3fe0385 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktPhenotypicfeatureDutch.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktPhenotypicfeatureDutch.java @@ -50,9 +50,9 @@ private String getOxfordCommaList(List items) { if (jj > 0) { String end = symList.substring(jj+2); if (vowels.contains(end.charAt(0))) { - symList = symList.substring(0, jj) + " i " + end; + symList = symList.substring(0, jj) + " en " + end; } else { - symList = symList.substring(0, jj) + " y " + end; + symList = symList.substring(0, jj) + " en " + end; } } return symList; @@ -80,9 +80,9 @@ public String formatFeatures(List ontologyTerms) { } else { String exclusion; if (excludedLabels.size() == 1) { - exclusion = String.format("en %s werd uitgesloten.", getOxfordCommaList(excludedLabels)); + exclusion = String.format(". %s werd uitgesloten.", getOxfordCommaList(excludedLabels)); } else { - exclusion = String.format("en %s zijn uitgesloten.", getOxfordCommaList(excludedLabels)); + exclusion = String.format(". %s zijn uitgesloten.", getOxfordCommaList(excludedLabels)); } return getOxfordCommaList(observedLabels) + exclusion; } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktTextDutch.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktTextDutch.java index 34c88a7..405c033 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktTextDutch.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktTextDutch.java @@ -7,9 +7,9 @@ public class PpktTextDutch implements PhenopacketTextGenerator { @Override public String QUERY_HEADER() { return """ -Ik doe een experiment met een klinisch verslag om te zien hoe jullie diagnoses zich verhouden tot die van menselijke experts. Ik geef je een deel van een medisch geval. Je probeert geen patiënten te behandelen. In dit geval ben je "Dr. GPT-4", een AI-taalmodel dat een diagnose stelt. Hier zijn enkele richtlijnen. Ten eerste is er één definitieve diagnose en dat is een diagnose waarvan bekend is dat die bij mensen bestaat. De diagnose wordt bijna altijd bevestigd door een of andere genetische test, maar in zeldzame gevallen waarin zo'n test niet bestaat voor een diagnose, kan de diagnose in plaats daarvan worden gesteld op basis van gevalideerde klinische criteria of heel zelden gewoon worden bevestigd door de mening van een expert. Nadat je de casus hebt gelezen, wil ik dat je een differentiaaldiagnose stelt met een lijst van mogelijke diagnoses gerangschikt naar waarschijnlijkheid, te beginnen met de meest waarschijnlijke kandidaat. Elke kandidaat moet gespecificeerd worden met de OMIM identifier en de naam van de ziekte. Bijvoorbeeld, als de eerste kandidaat het Branchiooculofaciaal syndroom is en de tweede Cystic fibrosis, geef dan dit: +Ik doe een experiment met een klinisch verslag om te zien hoe jullie diagnoses zich verhouden tot die van menselijke experts. Ik geef je een deel van een medisch geval. Je probeert geen patiënten te behandelen. In dit geval ben je "Dr. GPT-4", een AI-taalmodel dat een diagnose stelt. Hier zijn enkele richtlijnen. Ten eerste is er één definitieve diagnose en dat is een diagnose waarvan bekend is dat die bij mensen bestaat. De diagnose wordt bijna altijd bevestigd door één of andere genetische test, maar in zeldzame gevallen waarin zo'n test niet bestaat voor een diagnose, kan de diagnose in plaats daarvan worden gesteld op basis van gevalideerde klinische criteria of heel zelden gewoon worden bevestigd door de mening van een expert. Nadat je de casus hebt gelezen, wil ik dat je een differentiaaldiagnose stelt met een lijst van mogelijke diagnoses gerangschikt naar waarschijnlijkheid, te beginnen met de meest waarschijnlijke kandidaat. Elke kandidaat moet gespecificeerd worden met de OMIM identifier en de naam van de ziekte. Bijvoorbeeld, als de eerste kandidaat het Branchiooculofaciaal syndroom is en de tweede Cystic fibrosis, geef dan dit: -1. OMIM:113620 - Branchiooculofaciaal syndroom +1. OMIM:113620 - Branchio-oculo-faciaal syndroom 2. OMIM:219700 - Taaislijmziekte Deze lijst moet zoveel diagnoses bevatten als je redelijk acht. From a34bd0ebed8ef3af2a40c01a6c5c008724a1bc4a Mon Sep 17 00:00:00 2001 From: Kyran Wissink Date: Tue, 7 May 2024 14:25:13 +0200 Subject: [PATCH 06/39] Translated to Dutch with correct syntax --- .../output/impl/dutch/DutchPromptGenerator.java | 6 ------ .../output/impl/spanish/PpktIndividualSpanish.java | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java index 239495e..110c5a5 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java @@ -56,10 +56,4 @@ public String getVignetteAtAge(PhenopacketAge page, PhenopacketSex psex, List 0) { return String.format("una niña de %d años", y); - } else if (m>0) { + } else if (m > 0) { return String.format("una bebe niña de %d meses", m); } else { return String.format("una recien nacida %d meses", d); From ba6b4a36469e64559fd3be86af25b21f047aef00 Mon Sep 17 00:00:00 2001 From: "Leonardo local Kubuntu 22.04" Date: Tue, 7 May 2024 19:43:36 +0200 Subject: [PATCH 07/39] Got rid of request for OMIM IDs in English prompt, as discussed. --- .../output/impl/english/PpktTextEnglish.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktTextEnglish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktTextEnglish.java index 5bf3c37..647424c 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktTextEnglish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktTextEnglish.java @@ -6,14 +6,14 @@ public class PpktTextEnglish implements PhenopacketTextGenerator { @Override public String QUERY_HEADER() { return """ -I am running an experiment on a clinical case report to see how your diagnoses compare with those of human experts. I am going to give you part of a medical case. You are not trying to treat any patients. In this case, you are “Dr. GPT-4,” an AI language model who is providing a diagnosis Here are some guidelines. First, there is a single definitive diagnosis, and it is a diagnosis that is known today to exist in humans. The diagnosis is almost always confirmed by some sort of genetic test, though in rare cases when such a test does not exist for a diagnosis the diagnosis can instead be made using validated clinical criteria or very rarely just confirmed by expert opinion. After you read the case, I want you to give a differential diagnosis with a list of candidate diagnoses ranked by probability starting with the most likely candidate. Each candidate should be specified with the OMIM identifier and disease name. For instance, if the first candidate is Branchiooculofacial syndrome and the second is Cystic fibrosis, provide this: +I am running an experiment on a clinical case report to see how your diagnoses compare with those of human experts. I am going to give you part of a medical case. You are not trying to treat any patients. In this case, you are “Dr. GPT-4”, an AI language model who is providing a diagnosis. Here are some guidelines. First, there is a single definitive diagnosis, and it is a diagnosis that is known today to exist in humans. The diagnosis is almost always confirmed by some sort of genetic test, though in rare cases when such a test does not exist for a diagnosis the diagnosis can instead be made using validated clinical criteria or very rarely just confirmed by expert opinion. After you read the case, I want you to give a differential diagnosis with a list of candidate diagnoses ranked by probability starting with the most likely candidate. Each candidate should be specified with disease name. For instance, if the first candidate is Branchiooculofacial syndrome and the second is Cystic fibrosis, provide this: -1. OMIM:113620 - Branchiooculofacial syndrome -2. OMIM:219700 - Cystic fibrosis +1. Branchiooculofacial syndrome +2. Cystic fibrosis This list should provide as many diagnoses as you think are reasonable. -You do not need to explain your reasoning, just list the diagnoses together with the OMIM identifiers. +You do not need to explain your reasoning, just list the diagnoses. Here is the case: """; From b23f3b5e76a55fce0e068e1a91287e9d24d227c0 Mon Sep 17 00:00:00 2001 From: "Leonardo local Kubuntu 22.04" Date: Tue, 7 May 2024 23:13:52 +0200 Subject: [PATCH 08/39] Got rid of request for OMIM IDs in Spanish prompt, as discussed. --- .../output/impl/spanish/PpktTextSpanish.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktTextSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktTextSpanish.java index c31542b..bb807ef 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktTextSpanish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktTextSpanish.java @@ -7,14 +7,14 @@ public class PpktTextSpanish implements PhenopacketTextGenerator { @Override public String QUERY_HEADER() { return """ -Estoy realizando un experimento con el informe de un caso clínico para comparar sus diagnósticos con los de expertos humanos. Les voy a dar parte de un caso médico. No estás intentando tratar a ningún paciente. En este caso, usted es el “Dr. GPT-4”, un modelo de lenguaje de IA que proporciona un diagnóstico. Aquí hay algunas pautas. En primer lugar, existe un único diagnóstico definitivo, y es un diagnóstico que hoy se sabe que existe en humanos. El diagnóstico casi siempre se confirma mediante algún tipo de prueba genética, aunque en casos raros cuando no existe dicha prueba para un diagnóstico, el diagnóstico puede realizarse utilizando criterios clínicos validados o, muy raramente, simplemente confirmado por la opinión de un experto. Después de leer el caso, quiero que haga un diagnóstico diferencial con una lista de diagnósticos candidatos clasificados por probabilidad comenzando con el candidato más probable. Cada candidato debe especificarse con el identificador OMIM y el nombre de la enfermedad. Por ejemplo, si el primer candidato es el síndrome branquiooculofacial y el segundo es la fibrosis quística, proporcione lo siguiente: +Estoy realizando un experimento con el informe de un caso clínico para comparar sus diagnósticos con los de expertos humanos. Les voy a dar parte de un caso médico. No estás intentando tratar a ningún paciente. En este caso, usted es el “Dr. GPT-4”, un modelo de lenguaje de IA que proporciona un diagnóstico. Aquí hay algunas pautas. En primer lugar, existe un único diagnóstico definitivo, y es un diagnóstico que hoy se sabe que existe en humanos. El diagnóstico casi siempre se confirma mediante algún tipo de prueba genética, aunque en casos raros cuando no existe dicha prueba para un diagnóstico, el diagnóstico puede realizarse utilizando criterios clínicos validados o, muy raramente, simplemente confirmado por la opinión de un experto. Después de leer el caso, quiero que haga un diagnóstico diferencial con una lista de diagnósticos candidatos clasificados por probabilidad comenzando con el candidato más probable. Cada candidato debe especificarse con el nombre de la enfermedad. Por ejemplo, si el primer candidato es el síndrome branquiooculofacial y el segundo es la fibrosis quística, proporcione lo siguiente: -1. OMIM:113620 - Síndrome branquiooculofacial -2. OMIM:219700 - Fibrosis quística +1. Síndrome branquiooculofacial +2. Fibrosis quística Esta lista debe proporcionar tantos diagnósticos como considere razonables. -No es necesario que explique su razonamiento, simplemente enumere los diagnósticos junto con los identificadores OMIM. +No es necesario que explique su razonamiento, simplemente enumere los diagnósticos. Este es el caso: """; From de49049f95bc2969ee53e72aad84e384a43e4dd3 Mon Sep 17 00:00:00 2001 From: "Leonardo local Kubuntu 22.04" Date: Wed, 8 May 2024 23:12:41 +0200 Subject: [PATCH 09/39] Added spanish request for english reply to prompt --- .../phenopacket2prompt/output/impl/spanish/PpktTextSpanish.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktTextSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktTextSpanish.java index bb807ef..2292b1e 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktTextSpanish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktTextSpanish.java @@ -15,6 +15,7 @@ public String QUERY_HEADER() { Esta lista debe proporcionar tantos diagnósticos como considere razonables. No es necesario que explique su razonamiento, simplemente enumere los diagnósticos. +Te estoy dando estas instrucciones en Español pero quiero que proveas todas tus respuestas en Inglés. Este es el caso: """; From 128041fbafaf8b7566e4fc950f7d2967dbf515f5 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Fri, 10 May 2024 13:51:18 +0200 Subject: [PATCH 10/39] fixing a few spanish phrases --- .../impl/spanish/PpktIndividualSpanish.java | 70 ++++++++++--------- 1 file changed, 37 insertions(+), 33 deletions(-) diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java index 5e5060b..2689760 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java @@ -11,6 +11,10 @@ public class PpktIndividualSpanish implements PhenopacketIndividualInformationGenerator { + private static final String FEMALE_INFANT = "un bebé femenino"; + + + /** * Equivalent of "The clinical * @param individual @@ -83,7 +87,7 @@ public String ageAndSexAtLastExamination(PpktIndividual individual) { }; } else if (age.isInfant()) { return switch (psex) { - case FEMALE -> "un bebé femenino"; + case FEMALE -> FEMALE_INFANT; case MALE -> "un bebé masculino"; default -> "un bebé"; }; @@ -183,9 +187,9 @@ private String iso8601AtAgeOf(Iso8601Age isoAge) { components.add("1 dia"); } if (components.isEmpty()) { - return "as a newborn"; + return "en el período neonatal"; } else if (components.size() == 1) { - return "at the age of " + components.get(0); + return "a la edad de " + components.get(0); } else if (components.size() == 2) { return "a la edad de " + components.get(0) + " y " + components.get(1); } else { @@ -196,9 +200,9 @@ private String iso8601AtAgeOf(Iso8601Age isoAge) { private String onsetTermAtAgeOf(HpoOnsetAge hpoOnsetTermAge) { if (hpoOnsetTermAge.isFetus()) { - return "en el periodo fetal"; + return "en el período fetal"; } else if (hpoOnsetTermAge.isCongenital()) { - return "en el periodo neonatal"; + return "en el período neonatal"; } else if (hpoOnsetTermAge.isInfant()) { return "como un bebe"; } else if (hpoOnsetTermAge.isChild()) { @@ -252,39 +256,39 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 private String hpoOnsetIndividualDescription(PhenopacketSex psex, HpoOnsetAge hpoOnsetTermAge) { if (hpoOnsetTermAge.isFetus()) { return switch (psex) { - case FEMALE -> "female fetus"; - case MALE -> "male fetus"; - default -> "fetus"; + case FEMALE -> FEMALE_INFANT; + case MALE -> "feto masculino"; + default -> "feto"; }; } else if (hpoOnsetTermAge.isCongenital()) { return switch (psex) { - case FEMALE -> "female newborn"; - case MALE -> "male newborn"; - default -> "newborn"; + case FEMALE -> "una niña recién nacida"; + case MALE -> "un niño recién nacido"; + default -> "un bebe recién nacido"; }; } else if (hpoOnsetTermAge.isInfant()) { return switch (psex) { - case FEMALE -> "female infant"; - case MALE -> "male infant"; - default -> "infant"; + case FEMALE -> FEMALE_INFANT; + case MALE -> "un bebé masculino"; + default -> "un bebé"; }; } else if (hpoOnsetTermAge.isChild()) { return switch (psex) { - case FEMALE -> "girl"; - case MALE -> "boy"; - default -> "child"; + case FEMALE -> "niña"; + case MALE -> "niño"; + default -> "niño"; }; } else if (hpoOnsetTermAge.isJuvenile()) { return switch (psex) { - case FEMALE -> "female adolescent"; - case MALE -> "male adolescent"; - default -> "adolescent"; + case FEMALE -> "una adolescente femenina"; + case MALE -> "un adolescente masculino"; + default -> "un adolescente"; }; }else { return switch (psex) { - case FEMALE -> "woman"; - case MALE -> "man"; - default -> "adult"; + case FEMALE -> "una mujer"; + case MALE -> "un hombre"; + default -> "un adulto"; }; } } @@ -365,7 +369,7 @@ private String onsetAvailable(PhenopacketSex psex, PhenopacketAge onsetAge) { // should never happen throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); } - return String.format("The proband presented %s with", onsetDescription, onsetDescription); + return String.format("El paciente se presentó con %s", onsetDescription, onsetDescription); } private String ageNotAvailable(PhenopacketSex psex) { @@ -413,9 +417,9 @@ public String ppktSex(PpktIndividual individual) { } if (ageOpt.isEmpty()) { return switch (psex) { - case FEMALE -> "female"; - case MALE -> "male"; - default -> "individual"; + case FEMALE -> "mujer"; + case MALE -> "hombre"; + default -> "individuo"; }; } PhenopacketAge age = ageOpt.get();; @@ -439,15 +443,15 @@ public String ppktSex(PpktIndividual individual) { }; } else if (age.isInfant()) { return switch (psex) { - case FEMALE -> "female infant"; - case MALE -> "male infant"; - default -> "infant"; + case FEMALE -> FEMALE_INFANT; + case MALE -> "un infante masculino"; + default -> "un infante"; }; } else { return switch (psex) { - case FEMALE -> "woman"; - case MALE -> "man"; - default -> "individual"; + case FEMALE -> "mujer"; + case MALE -> "hombre"; + default -> "adulto"; }; } } From 3134826fae5dc29e2cdf9907698727053699110c Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Sat, 11 May 2024 13:05:33 +0200 Subject: [PATCH 11/39] german translation --- .../cmd/GbtTranslateBatchCommand.java | 6 +- .../cmd/GptTranslateCommand.java | 4 + .../HpInternationalOboParser.java | 2 +- .../output/PromptGenerator.java | 7 + .../impl/german/GermanPromptGenerator.java | 63 +++ .../impl/german/PpktIndividualGerman.java | 473 ++++++++++++++++++ .../german/PpktPhenotypicfeatureGerman.java | 82 +++ .../output/impl/german/PpktTextGerman.java | 23 + .../impl/spanish/PpktIndividualSpanish.java | 4 +- 9 files changed, 659 insertions(+), 5 deletions(-) create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktPhenotypicfeatureGerman.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktTextGerman.java diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java index c70d93d..a23aaca 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java @@ -65,6 +65,8 @@ public Integer call() throws Exception { // output all non-English languages here PromptGenerator spanish = PromptGenerator.spanish(hpo, internationalMap.get("es")); outputPromptsInternational(ppktFiles, hpo, "es", spanish); + PromptGenerator german = PromptGenerator.german(hpo, internationalMap.get("de")); + outputPromptsInternational(ppktFiles, hpo, "de", german); // output file with correct diagnosis list outputCorrectResults(correctResultList); return 0; @@ -97,7 +99,7 @@ private void outputPromptsInternational(List ppktFiles, Ontology hpo, Stri PpktIndividual individual = new PpktIndividual(f); List diseaseList = individual.getDiseases(); if (diseaseList.size() != 1) { - System.err.println(String.format("[ERROR] Got %d diseases for %s.\n", diseaseList.size(), individual.getPhenopacketId())); + System.err.printf("[ERROR] Got %d diseases for %s.\n", diseaseList.size(), individual.getPhenopacketId()); continue; } PhenopacketDisease pdisease = diseaseList.get(0); @@ -123,7 +125,7 @@ private List outputPromptsEnglish(List ppktFiles, Ontology PpktIndividual individual = new PpktIndividual(f); List diseaseList = individual.getDiseases(); if (diseaseList.size() != 1) { - System.err.println(String.format("[ERROR] Got %d diseases for %s.\n", diseaseList.size(), individual.getPhenopacketId())); + System.err.printf("[ERROR] Got %d diseases for %s.\n", diseaseList.size(), individual.getPhenopacketId()); continue; } PhenopacketDisease pdisease = diseaseList.get(0); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java index 88e311e..e1e4898 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java @@ -62,6 +62,10 @@ public Integer call() throws Exception { PromptGenerator spanish = PromptGenerator.spanish(hpo, internationalMap.get("es")); prompt = spanish.createPrompt(individual); System.out.println(prompt); + System.out.println("GERMAN"); + PromptGenerator german = PromptGenerator.spanish(hpo, internationalMap.get("de")); + prompt = german.createPrompt(individual); + System.out.println(prompt); return 0; } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java index 52a4824..4931986 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java @@ -53,7 +53,7 @@ public Optional getTranslation(String annots) { public HpInternationalOboParser(File file) { languageToInternationalMap = new HashMap<>(); String pattern = "id: (HP:\\d{7,7})"; - Set acronyms = Set.of("cs", "en", "es", "fr", "ja", "nl", "nna", "tr", "tw", "zh"); + Set acronyms = Set.of("cs", "en", "de", "it", "es", "fr", "ja", "nl", "nna", "tr", "tw", "zh"); for (String acronym : acronyms) { languageToInternationalMap.put(acronym, new HpInternational(acronym)); } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java index 6603b38..95597de 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java @@ -7,6 +7,8 @@ import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; import org.monarchinitiative.phenopacket2prompt.output.impl.english.EnglishPromptGenerator; +import org.monarchinitiative.phenopacket2prompt.output.impl.german.GermanPromptGenerator; +import org.monarchinitiative.phenopacket2prompt.output.impl.german.PpktPhenotypicfeatureGerman; import org.monarchinitiative.phenopacket2prompt.output.impl.spanish.*; import java.util.List; @@ -33,6 +35,11 @@ static PromptGenerator spanish(Ontology hpo, HpInternational international) { return new SpanishPromptGenerator(hpo, pfgen); } + static PromptGenerator german(Ontology hpo, HpInternational international) { + PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureGerman(international); + return new GermanPromptGenerator(hpo, pfgen); + } + /** * The following structure should work for most other languages, but the function * can be overridden if necessary. diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java new file mode 100644 index 0000000..509ef4f --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java @@ -0,0 +1,63 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.german; + +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketIndividualInformationGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketTextGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator; + +import java.util.List; + +public class GermanPromptGenerator implements PromptGenerator { + + private final Ontology hpo; + + + private final PhenopacketIndividualInformationGenerator ppktAgeSexGenerator; + + private final PhenopacketTextGenerator ppktTextGenerator; + + private final PpktPhenotypicFeatureGenerator ppktPhenotypicFeatureGenerator; + + + + public GermanPromptGenerator(Ontology hpo, PpktPhenotypicFeatureGenerator pfgen) { + this.hpo = hpo; + ppktAgeSexGenerator = new PpktIndividualGerman(); + ppktTextGenerator = new PpktTextGerman(); + this.ppktPhenotypicFeatureGenerator = pfgen; + } + + @Override + public String queryHeader() { + return ppktTextGenerator.QUERY_HEADER(); + } + + @Override + public String getIndividualInformation(PpktIndividual ppktIndividual) { + return this.ppktAgeSexGenerator.getIndividualDescription(ppktIndividual); + } + + @Override + public String formatFeatures(List ontologyTerms) { + return ppktPhenotypicFeatureGenerator.formatFeatures(ontologyTerms); + } + + @Override + public String getVignetteAtAge(PhenopacketAge page, PhenopacketSex psex, List terms) { + String ageString = this.ppktAgeSexGenerator.atAge(page); + String features = formatFeatures(terms); + return String.format("%s, %s presentó %s", ageString, ppktAgeSexGenerator.heSheIndividual(psex), features); + } + + + + + + + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java new file mode 100644 index 0000000..22333ee --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java @@ -0,0 +1,473 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.german; + +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenopacket2prompt.model.*; +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketIndividualInformationGenerator; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +public class PpktIndividualGerman implements PhenopacketIndividualInformationGenerator { + + + private static final String FEMALE_INFANT = "ein weiblicher Säugling"; + private static final String MALE_INFANT = "ein männlicher Säugling"; + private static final String INFANT = "ein Säugling"; + + private static final String FEMALE_FETUS = "ein weiblicher Fet"; + private static final String MALE_FETUS = "ein männlicher Fet"; + private static final String FETUS = "ein Fet"; + + private static final String FEMALE_CHILD = "Mädchen"; + private static final String MALE_CHILD = "Junge"; + private static final String CHILD = "Kind"; + + private static final String FEMALE_ADULT = "Frau"; + private static final String MALE_ADULT = "Mann"; + private static final String ADULT = "erwachsene Person unbekannten Geschlechtes"; + /** + * Equivalent of "The clinical + * @param individual + * @return + */ + public String ageAndSexAtOnset(PpktIndividual individual) { + Optional ageOpt = individual.getAgeAtOnset(); + return ""; + } + + + + + public String ageAndSexAtLastExamination(PpktIndividual individual) { + PhenopacketSex psex = individual.getSex(); + Optional ageOpt = individual.getAgeAtLastExamination(); + if (ageOpt.isEmpty()) { + ageOpt = individual.getAgeAtOnset(); + } + String sex; + switch (psex) { + case FEMALE -> sex = FEMALE_ADULT; + case MALE -> sex = MALE_ADULT; + default -> sex = ADULT; + } + + if (ageOpt.isEmpty()) { + return sex; + } + PhenopacketAge age = ageOpt.get(); + if (age.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoage = (Iso8601Age) age; + int y = isoage.getYears(); + int m = isoage.getMonths(); + int d = isoage.getDays(); + if (psex.equals(PhenopacketSex.FEMALE)) { + if (y > 17) { + return String.format("Eine %djährige Patientin", y); + } else if (y > 9) { + return String.format("Eine %djährige Jugendliche", y); + } else if (y > 0) { + return String.format("Ein %djähriges Mädchen", y); + } else if (m>0) { + return String.format("Ein %d Monate alter weiblicher Säugling", m); + } else { + return String.format("Ein %d Tage alter weiblicher Säugling", d); + } + } + } else { + // age is an HPO onset term, we do not have an exact date + } + if (age.isChild()) { + return switch (psex) { + case FEMALE -> FEMALE_CHILD; + case MALE -> MALE_CHILD; + default -> CHILD; + }; + } else if (age.isCongenital()) { + return switch (psex) { + case FEMALE -> "ein weibliches Neugeborenes"; + case MALE -> "ein männliches Neugeborenes"; + default -> "ein Neugeborenes"; + }; + } else if (age.isFetus()) { + return switch (psex) { + case FEMALE -> "ein weiblicher Fet"; + case MALE -> "ein männlicher Fet"; + default -> "ein Fet"; + }; + } else if (age.isInfant()) { + return switch (psex) { + case FEMALE -> FEMALE_INFANT; + case MALE -> MALE_INFANT; + default -> INFANT; + }; + } else { + return switch (psex) { + case FEMALE -> FEMALE_ADULT; + case MALE -> MALE_ADULT; + default -> ADULT; + }; + } + } + + + private String atIsoAgeExact(PhenopacketAge ppktAge) { + Iso8601Age iso8601Age = (Iso8601Age) ppktAge; + int y = iso8601Age.getYears(); + int m = iso8601Age.getMonths(); + int d = iso8601Age.getDays(); + + if (y > 10) { + return String.format("%d Jahre", y); + } else if (y > 0) { + if (m > 1) { + return String.format("%d Jahre und %d Monate", y, m); + } else if (m == 1) { + return String.format("%d Jahre und ein Monat", y); + } else { + return String.format("%d Jahre", y); + } + } else if (m>0) { + return String.format("%d Monate y %d Tage", m, d); + } else { + return String.format("%d Tage", d); + } + } + + + @Override + public String getIndividualDescription(PpktIndividual individual) { + Optional lastExamOpt = individual.getAgeAtLastExamination(); + Optional onsetOpt = individual.getAgeAtOnset(); + PhenopacketSex psex = individual.getSex(); + if (lastExamOpt.isPresent() && onsetOpt.isPresent()) { + return onsetAndLastEncounterAvailable(psex, lastExamOpt.get(), onsetOpt.get()); + } else if (lastExamOpt.isPresent()) { + return lastEncounterAvailable(psex, lastExamOpt.get()); + } else if (onsetOpt.isPresent()) { + return onsetAvailable(psex, onsetOpt.get()); + } else { + return ageNotAvailable(psex); + } + } + + + private String iso8601ToYearMonth(Iso8601Age iso8601Age) { + if (iso8601Age.getMonths() == 0) { + return String.format("de %d años", iso8601Age.getYears()); + } else { + return String.format("de %d años y %d meses", iso8601Age.getYears(), iso8601Age.getMonths()); + } + } + + private String iso8601ToMonthDay(Iso8601Age iso8601Age) { + int m = iso8601Age.getMonths(); + int d = iso8601Age.getDays(); + if (m == 0) { + return String.format("de %d dias", d); + } else if (d>0){ + return String.format("de %d meses y %d dias", m, d); + } else { + return String.format("de %d meses", m); + } + } + + /** + * Create a phrase such as "at the age of 7 years, 4 months, and 2 days" + * Leave out the months and days if they are zero. + * @param isoAge + * @return + */ + private String iso8601AtAgeOf(Iso8601Age isoAge) { + List components = new ArrayList<>(); + + if (isoAge.getYears()>1) { + components.add(String.format("%d Jahren", isoAge.getYears())); + } else if (isoAge.getYears() == 1) { + components.add("einem Jahr"); + } + if (isoAge.getMonths() > 1) { + components.add(String.format("%d Monaten", isoAge.getMonths())); + } else if (isoAge.getMonths() == 1) { + components.add("einem Monat"); + } + if (isoAge.getDays()>1) { + components.add(String.format("%d Tagen", isoAge.getDays())); + } else if (isoAge.getDays()==1) { + components.add("einem Tag"); + } + if (components.isEmpty()) { + return "im Neugeborenen Alter"; + } else if (components.size() == 1) { + return "im Alter von " + components.get(0); + } else if (components.size() == 2) { + return "im Alter von " + components.get(0) + " und " + components.get(1); + } else { + return "im Alter von " + components.get(0) + ", " + components.get(1) + + " und " + components.get(2); + } + } + + private String onsetTermAtAgeOf(HpoOnsetAge hpoOnsetTermAge) { + if (hpoOnsetTermAge.isFetus()) { + return "in der Fetalperiode"; + } else if (hpoOnsetTermAge.isCongenital()) { + return "im Neugeborenenalter"; + } else if (hpoOnsetTermAge.isInfant()) { + return "im Säuglingsalter"; + } else if (hpoOnsetTermAge.isChild()) { + return "in der Kindheit"; + } else if (hpoOnsetTermAge.isJuvenile()) { + return "como adolescente"; + } else { + return "im Erwachsenenalter"; + } + } + + + private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8601Age) { + int y = iso8601Age.getYears(); + int m = iso8601Age.getMonths(); + int d = iso8601Age.getDays(); + // if older + if (y>17) { + return switch (psex) { + case FEMALE -> String.format("eine %djährige Frau", y); + case MALE -> String.format("ein %djähriger Mann", y); + default -> String.format("eine %djährige Person", y); + }; + } else if (y>9) { + return switch (psex) { + case FEMALE -> String.format("una adolescente de %d años", y); + case MALE -> String.format("un adolescente de %d años", y); + default -> String.format("un adolescente de %d años", y); + }; + } else if (y>0) { + return switch (psex) { + case FEMALE -> String.format("niña %s", iso8601ToYearMonth(iso8601Age)); + case MALE -> String.format("niño %s", iso8601ToYearMonth(iso8601Age)); + default -> String.format("niño %s", iso8601ToYearMonth(iso8601Age)); + }; + } else if (m>0 || d> 0) { + return switch (psex) { + case FEMALE -> String.format("una infante %s", iso8601ToMonthDay(iso8601Age)); + case MALE -> String.format("un infante %s", iso8601ToMonthDay(iso8601Age)); + default -> String.format("un infante %s", iso8601ToMonthDay(iso8601Age)); + }; + } else { + return switch (psex) { + case FEMALE -> "recien nacida"; + case MALE -> "recien nacido"; + default -> "recien nacido"; + }; + } + } + + private String hpoOnsetIndividualDescription(PhenopacketSex psex, HpoOnsetAge hpoOnsetTermAge) { + if (hpoOnsetTermAge.isFetus()) { + return switch (psex) { + case FEMALE -> FEMALE_FETUS; + case MALE -> MALE_FETUS; + default -> FETUS; + }; + } else if (hpoOnsetTermAge.isCongenital()) { + return switch (psex) { + case FEMALE -> "una niña recién nacida"; + case MALE -> "un niño recién nacido"; + default -> "un bebe recién nacido"; + }; + } else if (hpoOnsetTermAge.isInfant()) { + return switch (psex) { + case FEMALE -> FEMALE_INFANT; + case MALE -> MALE_INFANT; + default -> INFANT; + }; + } else if (hpoOnsetTermAge.isChild()) { + return switch (psex) { + case FEMALE -> "niña"; + case MALE -> "niño"; + default -> "niño"; + }; + } else if (hpoOnsetTermAge.isJuvenile()) { + return switch (psex) { + case FEMALE -> "una adolescente femenina"; + case MALE -> "un adolescente masculino"; + default -> "un adolescente"; + }; + }else { + return switch (psex) { + case FEMALE -> "eine Frau"; + case MALE -> "ein Mann"; + default -> "eine Person"; + }; + } + } + + /** + * A sentence such as The proband was a 39-year old woman who presented at the age of 12 years with + * HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. This method returns the phrase that ends with "with" + * El sujeto era un niño de 1 año y 10 meses que se presentó como recién nacido con un filtrum largo. + * @param psex + * @param lastExamAge + * @param onsetAge + * @return + */ + private String onsetAndLastEncounterAvailable(PhenopacketSex psex, PhenopacketAge lastExamAge, PhenopacketAge onsetAge) { + String individualDescription; + String onsetDescription; + if (lastExamAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) lastExamAge; + individualDescription = iso8601individualDescription(psex, isoAge); + } else if (lastExamAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) lastExamAge; + individualDescription = hpoOnsetIndividualDescription(psex,hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize last exam age type " + lastExamAge.ageType()); + } + if (onsetAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) onsetAge; + onsetDescription = iso8601AtAgeOf(isoAge); + } else if (onsetAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) onsetAge; + onsetDescription = onsetTermAtAgeOf(hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); + } + return switch (psex) { + case FEMALE -> String.format("Die Probandin war %s, der sich %s mit den folgenden Symptomen vorgestellt hat: ", individualDescription, onsetDescription); + default -> String.format("Der Proband war %s, der sich %s mit den folgenden Symptomen vorgestellt hat: ", individualDescription, onsetDescription); + }; + } + + + /** + * Age at last examination available but age of onset not available + * The proband was a 39-year old woman who presented with HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. + * @param psex + * @param lastExamAge + */ + private String lastEncounterAvailable(PhenopacketSex psex, PhenopacketAge lastExamAge) { + String individualDescription; + if (lastExamAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) lastExamAge; + individualDescription = iso8601individualDescription(psex, isoAge); + } else if (lastExamAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) lastExamAge; + individualDescription = hpoOnsetIndividualDescription(psex,hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize last exam age type " + lastExamAge.ageType()); + } + return String.format("El paciente era %s quien se presentó con", individualDescription); + } + + /** + * Age at last examination not available but age of onset available + * The proband presented at the age of 12 years with HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. + * @param psex + * @param onsetAge + * @return + */ + private String onsetAvailable(PhenopacketSex psex, PhenopacketAge onsetAge) { + String onsetDescription; + if (onsetAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) onsetAge; + onsetDescription = iso8601AtAgeOf(isoAge); + } else if (onsetAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) onsetAge; + onsetDescription = onsetTermAtAgeOf(hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); + } + return String.format("Der Patient stellte sich %s mit den folgenden Symptomen vor: ", onsetDescription); + } + + private String ageNotAvailable(PhenopacketSex psex) { + return switch (psex) { + case FEMALE -> "Die Patientin stellte sich mit den folgenden Symptomen vor: "; + case MALE -> "Der Patient stellte sich mit den folgenden Symptomen vor: "; + default -> "Der Patient stellte sich mit den folgenden Symptomen vor: "; + }; + } + + @Override + public String heSheIndividual(PhenopacketSex psex) { + return switch (psex) { + case FEMALE -> "er"; + case MALE -> "sie"; + default -> "die Person"; + }; + } + + @Override + public String atAge(PhenopacketAge ppktAge) { + if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + return "im Alter von " + atIsoAgeExact(ppktAge); + } else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + String label = ppktAge.age(); // something like "Infantile onset" + return switch (label) { + case "Infantile onset" -> "als Säugling"; + case "Childhood onset" -> "in der Kindheit"; + case "Neonatal onset" -> "in der neugeborenen Zeit"; + case "Congenital onset" -> "zum Zeitpunkt der Geburt"; + case "Adult onset" -> "im Erwachsenenalter"; + default-> String.format("TODO TODO el %s período", label.replace(" onset", "")); + }; + } else { + return ""; // should never get here + } + } + + // @Override + public String ppktSex(PpktIndividual individual) { + PhenopacketSex psex = individual.getSex(); + Optional ageOpt = individual.getAgeAtLastExamination(); + if (ageOpt.isEmpty()) { + ageOpt = individual.getAgeAtOnset(); + } + if (ageOpt.isEmpty()) { + return switch (psex) { + case FEMALE -> "Frau"; + case MALE -> "Mann"; + default -> "Person"; + }; + } + PhenopacketAge age = ageOpt.get(); + if (age.isChild()) { + return switch (psex) { + case FEMALE -> "Mädchen"; + case MALE -> "Junge"; + default -> "Kind"; + }; + } else if (age.isCongenital()) { + return switch (psex) { + case FEMALE -> "weibliches Neugeborenes"; + case MALE -> "männliches Neugeborenes"; + default -> "Neugeborenes"; + }; + } else if (age.isFetus()) { + return switch (psex) { + case FEMALE -> FEMALE_FETUS; + case MALE -> MALE_FETUS; + default -> FETUS; + }; + } else if (age.isInfant()) { + return switch (psex) { + case FEMALE -> FEMALE_INFANT; + case MALE -> MALE_INFANT; + default -> INFANT; + }; + } else { + return switch (psex) { + case FEMALE -> "Frau"; + case MALE -> "Mann"; + default -> "Person"; + }; + } + } + + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktPhenotypicfeatureGerman.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktPhenotypicfeatureGerman.java new file mode 100644 index 0000000..9923d2c --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktPhenotypicfeatureGerman.java @@ -0,0 +1,82 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.german; + +import org.monarchinitiative.phenopacket2prompt.international.HpInternational; +import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; +import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.function.Predicate; + +public class PpktPhenotypicfeatureGerman implements PpktPhenotypicFeatureGenerator { + + private final HpInternational spanish; + + public PpktPhenotypicfeatureGerman(HpInternational international) { + spanish = international; + } + + + + private List getTranslations(List ontologyTerms) { + List labels = new ArrayList<>(); + for (var term: ontologyTerms) { + Optional opt = spanish.getLabel(term.getTid()); + if (opt.isPresent()) { + labels.add(opt.get()); + } else { + System.err.printf("[ERROR] Could not find %s translation for %s (%s).\n", spanish.getLanguageAcronym(), term.getLabel(), term.getTid().getValue()); + } + } + return labels; + } + + + + private String getOxfordCommaList(List items) { + if (items.size() == 1) { + return items.get(0); + } + if (items.size() == 2) { + // no comma if we just have two items. + // one item will work with the below code + return String.join(" und ", items); + } + String symList = String.join(", ", items); + int jj = symList.lastIndexOf(", "); + String end = symList.substring(jj+2); + symList = symList.substring(0, jj) + " und " + end; + return symList; + } + + @Override + public String formatFeatures(List ontologyTerms) { + List observedTerms = ontologyTerms.stream() + .filter(Predicate.not(OntologyTerm::isExcluded)) + .toList(); + List observedLabels = getTranslations(observedTerms); + List excludedTerms = ontologyTerms.stream() + .filter(OntologyTerm::isExcluded).toList(); + List excludedLabels = getTranslations(excludedTerms); + if (observedLabels.isEmpty() && excludedLabels.isEmpty()) { + return "keine phänotypischen Abnormalitäten"; // should never happen, actually! + } else if (excludedLabels.isEmpty()) { + return getOxfordCommaList(observedLabels) + ". "; + } else if (observedLabels.isEmpty()) { + if (excludedLabels.size() > 1) { + return String.format("Die folgenden Symptome wurden ausgeschlossen %s.", getOxfordCommaList(excludedLabels)); + } else { + return String.format("%s wurde ausgeschlossen.",excludedLabels.get(0)); + } + } else { + String exclusion; + if (excludedLabels.size() == 1) { + exclusion = String.format(", und %s wurde ausgeschlossen.", getOxfordCommaList(excludedLabels)); + } else { + exclusion = String.format(", und %s wurden ausgeschlossen.", getOxfordCommaList(excludedLabels)); + } + return getOxfordCommaList(observedLabels) + exclusion; + } + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktTextGerman.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktTextGerman.java new file mode 100644 index 0000000..23e70a5 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktTextGerman.java @@ -0,0 +1,23 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.german; + +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketTextGenerator; + +public class PpktTextGerman implements PhenopacketTextGenerator { + + @Override + public String QUERY_HEADER() { + return """ +Ich führe ein Experiment mit einem klinischen Fallbericht durch, um zu sehen, wie sich Ihre Diagnosen mit denen menschlicher Experten vergleichen lassen. Ich werde Ihnen einen Teil eines medizinischen Falles vorstellen. Sie versuchen nicht, irgendwelche Patienten zu behandeln. In diesem Fall sind Sie „Dr. GPT-4“, ein KI-Sprachmodell, das eine Diagnose liefert. Hier sind einige Richtlinien. Erstens gibt es eine einzige definitive Diagnose, und es ist eine Diagnose, von der heute bekannt ist, dass sie beim Menschen existiert. Die Diagnose wird fast immer durch einen Gentest bestätigt. In seltenen Fällen, in denen ein solcher Test für eine Diagnose nicht existiert, kann die Diagnose jedoch anhand validierter klinischer Kriterien gestellt oder in sehr seltenen Fällen einfach durch eine Expertenmeinung bestätigt werden. Nachdem Sie den Fall gelesen haben, möchte ich, dass Sie eine Differentialdiagnose mit einer Liste von Kandidatendiagnosen stellen, die nach Wahrscheinlichkeit geordnet sind, beginnend mit dem wahrscheinlichsten Kandidaten. Jeder Kandidat sollte mit dem Krankheitsnamen angegeben werden. Wenn es sich bei dem ersten Kandidaten beispielsweise um das Branchiookulofaziale Syndrom und bei dem zweiten um Mukoviszidose handelt, geben Sie Folgendes in englischer Sprache an: + +1. Branchiooculofacial syndrome +2. Cystic fibrosis + +Diese Liste sollte so viele Diagnosen enthalten, wie Sie für sinnvoll halten. Bitte übersetzen Sie diese Liste ins Englische. + +Sie müssen Ihre Argumentation nicht erläutern, sondern nur die Diagnosen auflisten. +Hier ist der Fall: + +"""; + } + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java index 2689760..3843ef4 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java @@ -61,7 +61,7 @@ public String ageAndSexAtLastExamination(PpktIndividual individual) { } else if (m>0) { return String.format("una bebe niña de %d meses", m); } else { - return String.format("una recien nacida %d meses", d); + return String.format("una recien nacida de %d dias de edad", d); } } } else { @@ -369,7 +369,7 @@ private String onsetAvailable(PhenopacketSex psex, PhenopacketAge onsetAge) { // should never happen throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); } - return String.format("El paciente se presentó con %s", onsetDescription, onsetDescription); + return String.format("El paciente se presentó con %s", onsetDescription); } private String ageNotAvailable(PhenopacketSex psex) { From d5fdd4a1df91f74545498af9fdb44fc90a1f5c82 Mon Sep 17 00:00:00 2001 From: Kyran Wissink Date: Thu, 16 May 2024 11:10:34 +0200 Subject: [PATCH 12/39] Fixed a logical issue --- .../impl/dutch/PpktIndividualDutch.java | 64 +++++++++---------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java index ae95387..74461f3 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java @@ -48,15 +48,15 @@ public String ageAndSexAtLastExamination(PpktIndividual individual) { int d = isoage.getDays(); if (psex.equals(PhenopacketSex.FEMALE)) { if (y > 17) { - return String.format("een vrouw van %d jaar oud", y); + return String.format("vrouw van %d jaar oud", y); } else if (y > 9) { - return String.format("een adolescente vrouw van %d jaar oud", y); + return String.format("adolescente vrouw van %d jaar oud", y); } else if (y > 0) { - return String.format("een meisje van %d jaar oud", y); + return String.format("meisje van %d jaar oud", y); } else if (m>0) { - return String.format("een baby van %d maanden oud", m); + return String.format("baby van %d maanden oud", m); } else { - return String.format("een pasgeboren vrouwelijke baby van %d dagen oud", d); + return String.format("pasgeboren vrouwelijke baby van %d dagen oud", d); } } } else { @@ -64,33 +64,33 @@ public String ageAndSexAtLastExamination(PpktIndividual individual) { } if (age.isChild()) { return switch (psex) { - case FEMALE -> "een meisje"; - case MALE -> "een jongetje"; - default -> "een kind"; // difficult to be gender neutral + case FEMALE -> "meisje"; + case MALE -> "jongetje"; + default -> "kind"; // difficult to be gender neutral }; } else if (age.isCongenital()) { return switch (psex) { - case FEMALE -> "een pasgeboren meisje"; - case MALE -> "een pasgeboren jongetje"; - default -> "een pasgeborene"; + case FEMALE -> "pasgeboren meisje"; + case MALE -> "pasgeboren jongetje"; + default -> "pasgeborene"; }; } else if (age.isFetus()) { return switch (psex) { - case FEMALE -> "een vrouwelijke foetus"; - case MALE -> "een mannelijke foetus"; - default -> "een foetus"; + case FEMALE -> "vrouwelijke foetus"; + case MALE -> "mannelijke foetus"; + default -> "foetus"; }; } else if (age.isInfant()) { return switch (psex) { - case FEMALE -> "een vrouwelijke baby"; - case MALE -> "een mannelijke baby"; - default -> "een baby"; + case FEMALE -> "vrouwelijke baby"; + case MALE -> "mannelijke baby"; + default -> "baby"; }; } else { return switch (psex) { - case FEMALE -> "een vrouw"; - case MALE -> "een man"; - default -> "een volwassene"; + case FEMALE -> "vrouw"; + case MALE -> "man"; + default -> "volwassene"; }; } } @@ -285,15 +285,15 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 // if older if (y>17) { return switch (psex) { - case FEMALE -> String.format("een vrouw van %d jaar oud", y); - case MALE -> String.format("een man van %d jaar oud", y); - default -> String.format("een persoon van %d jaar oud", y); + case FEMALE -> String.format("vrouw van %d jaar oud", y); + case MALE -> String.format("man van %d jaar oud", y); + default -> String.format("persoon van %d jaar oud", y); }; } else if (y>9) { return switch (psex) { - case FEMALE -> String.format("een vrouwelijke adolescent van %d jaar oud", y); - case MALE -> String.format("een mannelijke adolescent van %d jaar oud", y); - default -> String.format("een adolescent van %d jaar oud", y); + case FEMALE -> String.format("vrouwelijke adolescent van %d jaar oud", y); + case MALE -> String.format("mannelijke adolescent van %d jaar oud", y); + default -> String.format("adolescent van %d jaar oud", y); }; } else if (y>0) { return switch (psex) { @@ -303,15 +303,15 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 }; } else if (m>0 || d> 0) { return switch (psex) { - case FEMALE -> String.format("een vrouwelijke baby %s", iso8601ToMonthDay(iso8601Age)); - case MALE -> String.format("een mannelijke baby %s", iso8601ToMonthDay(iso8601Age)); - default -> String.format("een baby %s", iso8601ToMonthDay(iso8601Age)); + case FEMALE -> String.format("vrouwelijke baby %s", iso8601ToMonthDay(iso8601Age)); + case MALE -> String.format("mannelijke baby %s", iso8601ToMonthDay(iso8601Age)); + default -> String.format("baby %s", iso8601ToMonthDay(iso8601Age)); }; } else { return switch (psex) { - case FEMALE -> "een pasgeboren meisje"; - case MALE -> "een pasgeboren jongetje"; - default -> "een pasgeborene"; + case FEMALE -> "pasgeboren meisje"; + case MALE -> "pasgeboren jongetje"; + default -> "pasgeborene"; }; } } From ec78c03f72014e61228cbee484a977a2d6243e45 Mon Sep 17 00:00:00 2001 From: Kyran Wissink Date: Thu, 16 May 2024 12:55:51 +0200 Subject: [PATCH 13/39] Changed the GPT text to the new method --- .../output/impl/dutch/PpktTextDutch.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktTextDutch.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktTextDutch.java index 405c033..c899c8b 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktTextDutch.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktTextDutch.java @@ -7,14 +7,15 @@ public class PpktTextDutch implements PhenopacketTextGenerator { @Override public String QUERY_HEADER() { return """ -Ik doe een experiment met een klinisch verslag om te zien hoe jullie diagnoses zich verhouden tot die van menselijke experts. Ik geef je een deel van een medisch geval. Je probeert geen patiënten te behandelen. In dit geval ben je "Dr. GPT-4", een AI-taalmodel dat een diagnose stelt. Hier zijn enkele richtlijnen. Ten eerste is er één definitieve diagnose en dat is een diagnose waarvan bekend is dat die bij mensen bestaat. De diagnose wordt bijna altijd bevestigd door één of andere genetische test, maar in zeldzame gevallen waarin zo'n test niet bestaat voor een diagnose, kan de diagnose in plaats daarvan worden gesteld op basis van gevalideerde klinische criteria of heel zelden gewoon worden bevestigd door de mening van een expert. Nadat je de casus hebt gelezen, wil ik dat je een differentiaaldiagnose stelt met een lijst van mogelijke diagnoses gerangschikt naar waarschijnlijkheid, te beginnen met de meest waarschijnlijke kandidaat. Elke kandidaat moet gespecificeerd worden met de OMIM identifier en de naam van de ziekte. Bijvoorbeeld, als de eerste kandidaat het Branchiooculofaciaal syndroom is en de tweede Cystic fibrosis, geef dan dit: +Ik voer een experiment uit op basis van een klinisch casusrapport om te zien hoe jouw diagnoses zich verhouden tot die van menselijke experts. Ik ga je een deel van een medische casus geven. Je probeert geen patiënten te behandelen. In dit geval ben je “Dr. GPT-4”, een AI-taalmodel dat een diagnose stelt. Hier zijn enkele richtlijnen. Ten eerste bestaat er één definitieve diagnose, en het is een diagnose waarvan tegenwoordig bekend is dat deze ook bij mensen voorkomt. De diagnose wordt bijna altijd bevestigd door een soort genetische test, hoewel in zeldzame gevallen, wanneer een dergelijke test niet bestaat voor een diagnose, de diagnose in plaats daarvan kan worden gesteld op basis van gevalideerde klinische criteria of zeer zelden alleen maar kan worden bevestigd door de mening van deskundigen. Nadat je de casus hebt gelezen, wil ik dat je een differentiële diagnose geeft met een lijst met kandidaat-diagnoses, gerangschikt op waarschijnlijkheid, te beginnen met de meest waarschijnlijke kandidaat. Elke kandidaat moet worden gespecificeerd met de ziektenaam. Als de eerste kandidaat bijvoorbeeld het Branchiooculofaciaal syndroom is en de tweede cystische fibrose, geef het dan zo weer: -1. OMIM:113620 - Branchio-oculo-faciaal syndroom -2. OMIM:219700 - Taaislijmziekte +1. Branchio-oculofaciaal syndroom +2. Cystische fibrose Deze lijst moet zoveel diagnoses bevatten als je redelijk acht. -Je hoeft je redenering niet uit te leggen, je hoeft alleen de diagnoses samen met de OMIM-identifiers op te sommen. Dit is het geval: +Je hoeft je redenering niet uit te leggen, vermeld alleen de diagnoses. +Hier is het geval: """; } From 31854314d66b71d433500f13b2ead5bbd20eab39 Mon Sep 17 00:00:00 2001 From: "Leonardo local Kubuntu 22.04" Date: Thu, 16 May 2024 16:49:21 +0200 Subject: [PATCH 14/39] fixed filenaming error, regex was wrong. New version changes any non-word char to underscore. Also added a language code in the prompt file name, to avoid identical file names distinguished only by directory --- .../phenopacket2prompt/cmd/GbtTranslateBatchCommand.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java index c70d93d..472eb36 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java @@ -83,8 +83,8 @@ private void outputCorrectResults(List correctResultList) { } - private String getFileName(String phenopacketID) { - return phenopacketID.replaceAll("[^\\w]", phenopacketID).replaceAll("/","_") + "-prompt.txt"; + private String getFileName(String phenopacketID, String languageCode) { + return phenopacketID.replaceAll("[^\\w]","_") + "_" + languageCode + "-prompt.txt"; } @@ -101,7 +101,7 @@ private void outputPromptsInternational(List ppktFiles, Ontology hpo, Stri continue; } PhenopacketDisease pdisease = diseaseList.get(0); - String promptFileName = getFileName( individual.getPhenopacketId()); + String promptFileName = getFileName( individual.getPhenopacketId(), languageCode); String diagnosisLine = String.format("%s\t%s\t%s\t%s", pdisease.getDiseaseId(), pdisease.getLabel(), promptFileName, f.getAbsolutePath()); try { diagnosisList.add(diagnosisLine); @@ -127,7 +127,7 @@ private List outputPromptsEnglish(List ppktFiles, Ontology continue; } PhenopacketDisease pdisease = diseaseList.get(0); - String promptFileName = getFileName( individual.getPhenopacketId()); + String promptFileName = getFileName( individual.getPhenopacketId(), "en"); String diagnosisLine = String.format("%s\t%s\t%s\t%s", pdisease.getDiseaseId(), pdisease.getLabel(), promptFileName, f.getAbsolutePath()); try { String prompt = generator.createPrompt(individual); From 21bc283ff5df8d0d07f389586bc8c79fd41f37b4 Mon Sep 17 00:00:00 2001 From: Kyran Wissink Date: Fri, 17 May 2024 12:33:44 +0200 Subject: [PATCH 15/39] Updated new logic to comply with Spanish and German. --- .../output/impl/dutch/PpktTextDutch.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktTextDutch.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktTextDutch.java index c899c8b..82c61e6 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktTextDutch.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktTextDutch.java @@ -7,14 +7,15 @@ public class PpktTextDutch implements PhenopacketTextGenerator { @Override public String QUERY_HEADER() { return """ -Ik voer een experiment uit op basis van een klinisch casusrapport om te zien hoe jouw diagnoses zich verhouden tot die van menselijke experts. Ik ga je een deel van een medische casus geven. Je probeert geen patiënten te behandelen. In dit geval ben je “Dr. GPT-4”, een AI-taalmodel dat een diagnose stelt. Hier zijn enkele richtlijnen. Ten eerste bestaat er één definitieve diagnose, en het is een diagnose waarvan tegenwoordig bekend is dat deze ook bij mensen voorkomt. De diagnose wordt bijna altijd bevestigd door een soort genetische test, hoewel in zeldzame gevallen, wanneer een dergelijke test niet bestaat voor een diagnose, de diagnose in plaats daarvan kan worden gesteld op basis van gevalideerde klinische criteria of zeer zelden alleen maar kan worden bevestigd door de mening van deskundigen. Nadat je de casus hebt gelezen, wil ik dat je een differentiële diagnose geeft met een lijst met kandidaat-diagnoses, gerangschikt op waarschijnlijkheid, te beginnen met de meest waarschijnlijke kandidaat. Elke kandidaat moet worden gespecificeerd met de ziektenaam. Als de eerste kandidaat bijvoorbeeld het Branchiooculofaciaal syndroom is en de tweede cystische fibrose, geef het dan zo weer: +Ik voer een experiment uit op basis van een klinisch casusrapport om te zien hoe jouw diagnoses zich verhouden tot die van menselijke experts. Ik ga je een deel van een medische casus geven. Je probeert geen patiënten te behandelen. In dit geval ben je “Dr. GPT-4”, een AI-taalmodel dat een diagnose stelt. Hier zijn enkele richtlijnen. Ten eerste bestaat er één definitieve diagnose, en het is een diagnose waarvan tegenwoordig bekend is dat deze ook bij mensen voorkomt. De diagnose wordt bijna altijd bevestigd door een soort genetische test, hoewel in zeldzame gevallen, wanneer een dergelijke test niet bestaat voor een diagnose, de diagnose in plaats daarvan kan worden gesteld op basis van gevalideerde klinische criteria of zeer zelden alleen maar kan worden bevestigd door de mening van deskundigen. Nadat je de casus hebt gelezen, wil ik dat je een differentiële diagnose geeft met een lijst met kandidaat-diagnoses, gerangschikt op waarschijnlijkheid, te beginnen met de meest waarschijnlijke kandidaat. Elke kandidaat moet worden gespecificeerd met de ziektenaam. Als de eerste kandidaat bijvoorbeeld het Branchio-oculofaciaal syndroom is en de tweede cystische fibrose, geef het dan zo in het Engels weer: -1. Branchio-oculofaciaal syndroom -2. Cystische fibrose +1. Branchiooculofacial syndrome +2. Cystic fibrosis Deze lijst moet zoveel diagnoses bevatten als je redelijk acht. - Je hoeft je redenering niet uit te leggen, vermeld alleen de diagnoses. +Ik heb je deze instructies in het Nederlands gegeven, maar ik zou graag willen dat je je antwoord alleen in het Engels geeft. + Hier is het geval: """; } From 5b902ca62bcd203a5b87a6b8f38f8d67a4f5f728 Mon Sep 17 00:00:00 2001 From: "Leonardo local Kubuntu 22.04" Date: Fri, 17 May 2024 12:36:26 +0200 Subject: [PATCH 16/39] harmonize spanish and german prompt, fixes #12. --- .../output/impl/german/PpktTextGerman.java | 5 +++-- .../output/impl/spanish/PpktTextSpanish.java | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktTextGerman.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktTextGerman.java index 23e70a5..8c29c7b 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktTextGerman.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktTextGerman.java @@ -12,9 +12,10 @@ public String QUERY_HEADER() { 1. Branchiooculofacial syndrome 2. Cystic fibrosis -Diese Liste sollte so viele Diagnosen enthalten, wie Sie für sinnvoll halten. Bitte übersetzen Sie diese Liste ins Englische. +Diese Liste sollte so viele Diagnosen enthalten, wie Sie für sinnvoll halten. -Sie müssen Ihre Argumentation nicht erläutern, sondern nur die Diagnosen auflisten. +Sie müssen Ihre Argumentation nicht erläutern, sondern nur die Diagnosen auflisten. +Ich habe Ihnen diese Anleitung auf English gegeben, aber ich bitte Sie, ihre Antwort ausschließlich auf English zu liefern. Hier ist der Fall: """; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktTextSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktTextSpanish.java index 2292b1e..62dbea5 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktTextSpanish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktTextSpanish.java @@ -7,10 +7,10 @@ public class PpktTextSpanish implements PhenopacketTextGenerator { @Override public String QUERY_HEADER() { return """ -Estoy realizando un experimento con el informe de un caso clínico para comparar sus diagnósticos con los de expertos humanos. Les voy a dar parte de un caso médico. No estás intentando tratar a ningún paciente. En este caso, usted es el “Dr. GPT-4”, un modelo de lenguaje de IA que proporciona un diagnóstico. Aquí hay algunas pautas. En primer lugar, existe un único diagnóstico definitivo, y es un diagnóstico que hoy se sabe que existe en humanos. El diagnóstico casi siempre se confirma mediante algún tipo de prueba genética, aunque en casos raros cuando no existe dicha prueba para un diagnóstico, el diagnóstico puede realizarse utilizando criterios clínicos validados o, muy raramente, simplemente confirmado por la opinión de un experto. Después de leer el caso, quiero que haga un diagnóstico diferencial con una lista de diagnósticos candidatos clasificados por probabilidad comenzando con el candidato más probable. Cada candidato debe especificarse con el nombre de la enfermedad. Por ejemplo, si el primer candidato es el síndrome branquiooculofacial y el segundo es la fibrosis quística, proporcione lo siguiente: +Estoy realizando un experimento con el informe de un caso clínico para comparar sus diagnósticos con los de expertos humanos. Les voy a dar parte de un caso médico. No estás intentando tratar a ningún paciente. En este caso, usted es el “Dr. GPT-4”, un modelo de lenguaje de IA que proporciona un diagnóstico. Aquí hay algunas pautas. En primer lugar, existe un único diagnóstico definitivo, y es un diagnóstico que hoy se sabe que existe en humanos. El diagnóstico casi siempre se confirma mediante algún tipo de prueba genética, aunque en casos raros cuando no existe dicha prueba para un diagnóstico, el diagnóstico puede realizarse utilizando criterios clínicos validados o, muy raramente, simplemente confirmado por la opinión de un experto. Después de leer el caso, quiero que haga un diagnóstico diferencial con una lista de diagnósticos candidatos clasificados por probabilidad comenzando con el candidato más probable. Cada candidato debe especificarse con el nombre de la enfermedad. Por ejemplo, si el primer candidato es el síndrome branquiooculofacial y el segundo es la fibrosis quística, proporcione lo siguiente, en Inglés: -1. Síndrome branquiooculofacial -2. Fibrosis quística +1. Branchiooculofacial syndrome +2. Cystic fibrosis Esta lista debe proporcionar tantos diagnósticos como considere razonables. From 1c4b4f2574f777c9cb813ff3e6de7fe6d60b8cc3 Mon Sep 17 00:00:00 2001 From: Leonardo macOS Date: Mon, 27 May 2024 15:46:12 +0200 Subject: [PATCH 17/39] first working version of italian, to be polished but close to final --- .../cmd/GbtTranslateBatchCommand.java | 11 +- .../output/PromptGenerator.java | 6 + .../impl/italian/ItalianPromptGenerator.java | 63 +++ .../impl/italian/PpktIndividualItalian.java | 478 ++++++++++++++++++ .../italian/PpktPhenotypicfeatureItalian.java | 90 ++++ .../output/impl/italian/PpktTextItalian.java | 24 + 6 files changed, 669 insertions(+), 3 deletions(-) create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalian.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktPhenotypicfeatureItalian.java create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktTextItalian.java diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java index cef1fc3..d79acda 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java @@ -63,14 +63,19 @@ public Integer call() throws Exception { createDir("prompts"); List correctResultList = outputPromptsEnglish(ppktFiles, hpo); // output all non-English languages here + + // SPANISH PromptGenerator spanish = PromptGenerator.spanish(hpo, internationalMap.get("es")); outputPromptsInternational(ppktFiles, hpo, "es", spanish); - + // DUTCH PromptGenerator dutch = PromptGenerator.dutch(hpo, internationalMap.get("nl")); outputPromptsInternational(ppktFiles, hpo, "nl", dutch); - + // GERMAN PromptGenerator german = PromptGenerator.german(hpo, internationalMap.get("de")); outputPromptsInternational(ppktFiles, hpo, "de", german); + // ITALIAN + PromptGenerator italian = PromptGenerator.italian(hpo, internationalMap.get("it")); + outputPromptsInternational(ppktFiles, hpo, "it", italian); // output file with correct diagnosis list outputCorrectResults(correctResultList); @@ -86,7 +91,7 @@ private void outputCorrectResults(List correctResultList) { } catch (IOException e) { e.printStackTrace(); } - System.out.printf("[INFO] Output a total of %d prompts in en and es.\n", correctResultList.size()); + System.out.printf("[INFO] Output a total of %d prompts in en, es, nl, de, and it.\n", correctResultList.size()); } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java index 85ee97c..2db16de 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java @@ -11,6 +11,8 @@ import org.monarchinitiative.phenopacket2prompt.output.impl.german.PpktPhenotypicfeatureGerman; import org.monarchinitiative.phenopacket2prompt.output.impl.spanish.*; import org.monarchinitiative.phenopacket2prompt.output.impl.dutch.*; +import org.monarchinitiative.phenopacket2prompt.output.impl.italian.*; + import java.util.List; import java.util.Map; @@ -46,6 +48,10 @@ static PromptGenerator german(Ontology hpo, HpInternational international) { PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureGerman(international); return new GermanPromptGenerator(hpo, pfgen); } + static PromptGenerator italian(Ontology hpo, HpInternational international) { + PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureItalian(international); + return new ItalianPromptGenerator(hpo, pfgen); + } /** diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java new file mode 100644 index 0000000..8dae68c --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java @@ -0,0 +1,63 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.italian; + +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketIndividualInformationGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketTextGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator; + +import java.util.List; + +public class ItalianPromptGenerator implements PromptGenerator { + + private final Ontology hpo; + + + private final PhenopacketIndividualInformationGenerator ppktAgeSexGenerator; + + private final PhenopacketTextGenerator ppktTextGenerator; + + private final PpktPhenotypicFeatureGenerator ppktPhenotypicFeatureGenerator; + + + + public ItalianPromptGenerator(Ontology hpo, PpktPhenotypicFeatureGenerator pfgen) { + this.hpo = hpo; + ppktAgeSexGenerator = new PpktIndividualItalian(); + ppktTextGenerator = new PpktTextItalian(); + this.ppktPhenotypicFeatureGenerator = pfgen; + } + + @Override + public String queryHeader() { + return ppktTextGenerator.QUERY_HEADER(); + } + + @Override + public String getIndividualInformation(PpktIndividual ppktIndividual) { + return this.ppktAgeSexGenerator.getIndividualDescription(ppktIndividual); + } + + @Override + public String formatFeatures(List ontologyTerms) { + return ppktPhenotypicFeatureGenerator.formatFeatures(ontologyTerms); + } + + @Override + public String getVignetteAtAge(PhenopacketAge page, PhenopacketSex psex, List terms) { + String ageString = this.ppktAgeSexGenerator.atAge(page); + String features = formatFeatures(terms); + return String.format("%s, %s è presentato %s", ageString, ppktAgeSexGenerator.heSheIndividual(psex), features); + } + + + + + + + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalian.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalian.java new file mode 100644 index 0000000..3a5b8a4 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalian.java @@ -0,0 +1,478 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.italian; + +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenopacket2prompt.model.*; +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketIndividualInformationGenerator; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +public class PpktIndividualItalian implements PhenopacketIndividualInformationGenerator { + + + + private static final String FEMALE_FETUS = "un feto femmina"; + private static final String MALE_FETUS = "un feto maschio"; + private static final String FETUS = "un feto"; + + private static final String FEMALE_NEWBORN = "una neonata femmina"; + private static final String MALE_NEWBORN = "un neonato maschio"; + private static final String NEWBORN = "un neonato"; + + private static final String FEMALE_INFANT = "un'infante femmina"; + private static final String MALE_INFANT = "un infante maschio"; + private static final String INFANT = "un infante"; + + private static final String FEMALE_CHILD = "una bambina"; + private static final String MALE_CHILD = "un bambino"; + private static final String CHILD = "un bambino"; + + private static final String FEMALE_ADULT = "una donna"; + private static final String MALE_ADULT = "un uomo"; + private static final String ADULT = "una persona adulta"; + + + /** + * Equivalent of "The clinical + * @param individual + * @return + */ + public String ageAndSexAtOnset(PpktIndividual individual) { + Optional ageOpt = individual.getAgeAtOnset(); + return ""; + } + + + + + public String ageAndSexAtLastExamination(PpktIndividual individual) { + PhenopacketSex psex = individual.getSex(); + Optional ageOpt = individual.getAgeAtLastExamination(); + if (ageOpt.isEmpty()) { + ageOpt = individual.getAgeAtOnset(); + } + String sex; + switch (psex) { + case FEMALE -> sex = "una paziente femmina"; + case MALE -> sex = "un paziente maschio"; + default -> sex = "una persona"; + }; + + if (ageOpt.isEmpty()) { + return sex; + } + PhenopacketAge age = ageOpt.get(); + if (age.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoage = (Iso8601Age) age; + int y = isoage.getYears(); + int m = isoage.getMonths(); + int d = isoage.getDays(); + if (psex.equals(PhenopacketSex.FEMALE)) { + if (y > 17) { + return String.format("una donna di %d anni", y); + } else if (y > 9) { + return String.format("una adolescente di %d anni", y); + + } else if (y > 0) { + return String.format("una bambina di %d anni", y); + } else if (m > 0) { + return String.format("un'infante femmina di %d mesi", m); + } else { + return String.format("una neonata di %d giorni", d); + } + } + } else { + // age is an HPO onset term, we do not have an exact date + } + if (age.isChild()) { + return switch (psex) { + case FEMALE -> FEMALE_CHILD; + case MALE -> MALE_CHILD; + default -> CHILD; // difficult to be gender neutral + }; + } else if (age.isCongenital()) { + return switch (psex) { + case FEMALE -> FEMALE_NEWBORN; + case MALE -> MALE_NEWBORN; + default -> NEWBORN; + }; + } else if (age.isFetus()) { + return switch (psex) { + case FEMALE -> FEMALE_FETUS; + case MALE -> MALE_FETUS; + default -> FETUS; + }; + } else if (age.isInfant()) { + return switch (psex) { + case FEMALE -> FEMALE_INFANT; + case MALE -> MALE_INFANT; + default -> INFANT; + }; + } else { + return switch (psex) { + case FEMALE -> FEMALE_ADULT; + case MALE -> MALE_ADULT; + default -> ADULT; + }; + } + } + + + private String atIsoAgeExact(PhenopacketAge ppktAge) { + Iso8601Age iso8601Age = (Iso8601Age) ppktAge; + int y = iso8601Age.getYears(); + int m = iso8601Age.getMonths(); + int d = iso8601Age.getDays(); + + if (y > 10) { + return String.format("%d anni", y); + } else if (y > 0) { + if (m > 1) { + return String.format("%d anni e %d mesi", y, m); + } else if (m == 1) { + return String.format("%d anni e un mese", y); + } else { + return String.format("%d anni", y); + } + } else if (m>0) { + return String.format("%d mesi e %d giorni", m, d); + } else { + return String.format("%d giorni", d); + } + } + + + @Override + public String getIndividualDescription(PpktIndividual individual) { + Optional lastExamOpt = individual.getAgeAtLastExamination(); + Optional onsetOpt = individual.getAgeAtOnset(); + PhenopacketSex psex = individual.getSex(); + if (lastExamOpt.isPresent() && onsetOpt.isPresent()) { + return onsetAndLastEncounterAvailable(psex, lastExamOpt.get(), onsetOpt.get()); + } else if (lastExamOpt.isPresent()) { + return lastEncounterAvailable(psex, lastExamOpt.get()); + } else if (onsetOpt.isPresent()) { + return onsetAvailable(psex, onsetOpt.get()); + } else { + return ageNotAvailable(psex); + } + } + + + private String iso8601ToYearMonth(Iso8601Age iso8601Age) { + if (iso8601Age.getMonths() == 0) { + return String.format("di %d anni", iso8601Age.getYears()); + } else { + return String.format("di %d anni e %d mesi", iso8601Age.getYears(), iso8601Age.getMonths()); + } + } + + private String iso8601ToMonthDay(Iso8601Age iso8601Age) { + int m = iso8601Age.getMonths(); + int d = iso8601Age.getDays(); + if (m == 0) { + return String.format("di %d giorni", d); + } else if (d>0){ + return String.format("di %d mesi e %d giorni", m, d); + } else { + return String.format("di %d mesi", m); + } + } + + /** + * Create a phrase such as "at the age of 7 years, 4 months, and 2 days" + * Leave out the months and days if they are zero. + * @param isoAge + * @return + */ + private String iso8601AtAgeOf(Iso8601Age isoAge) { + List components = new ArrayList<>(); + + if (isoAge.getYears()>1) { + components.add(String.format("%d anni", isoAge.getYears())); + } else if (isoAge.getYears() == 1) { + components.add("1 anno"); + } + if (isoAge.getMonths() > 1) { + components.add(String.format("%d mesi", isoAge.getMonths())); + } else if (isoAge.getMonths() == 1) { + components.add("1 mese"); + } + if (isoAge.getDays()>1) { + components.add(String.format("%d giorni", isoAge.getDays())); + } else if (isoAge.getDays()==1) { + components.add("1 giorno"); + } + if (components.isEmpty()) { + return "nel periodo neonatale"; + } else if (components.size() == 1) { + return "all'età di " + components.get(0); + } else if (components.size() == 2) { + return "all'età di " + components.get(0) + " e " + components.get(1); + } else { + return "all'età di " + components.get(0) + ", " + components.get(1) + + " e " + components.get(2); + } + } + + private String onsetTermAtAgeOf(HpoOnsetAge hpoOnsetTermAge) { + if (hpoOnsetTermAge.isFetus()) { + return "nel periodo fetale"; + } else if (hpoOnsetTermAge.isCongenital()) { + return "nel periodo neonatale"; + } else if (hpoOnsetTermAge.isInfant()) { + return "nel periodo infantile"; // unsure, to be checked + } else if (hpoOnsetTermAge.isChild()) { + return "da bambino"; // check + } else if (hpoOnsetTermAge.isJuvenile()) { + return "nell'adolescenza"; + } else { + return "in età adulta"; + } + } + + + private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8601Age) { + int y = iso8601Age.getYears(); + int m = iso8601Age.getMonths(); + int d = iso8601Age.getDays(); + // if older + if (y>17) { + return switch (psex) { + case FEMALE -> String.format("una donna di %d anni", y); + case MALE -> String.format("un uomo di %d anni", y); + default -> String.format("una persona di %d anni", y); + }; + } else if (y>9) { + return switch (psex) { + case FEMALE -> String.format("un'adolescente di %d anni", y); + case MALE -> String.format("un adolescente di %d anni", y); + default -> String.format("un adolescente di %d anni", y); + }; + } else if (y>0) { + return switch (psex) { + case FEMALE -> String.format("bambina %s", iso8601ToYearMonth(iso8601Age)); + case MALE -> String.format("bambino %s", iso8601ToYearMonth(iso8601Age)); + default -> String.format("bambino %s", iso8601ToYearMonth(iso8601Age)); + }; + } else if (m>0 || d> 0) { + return switch (psex) { + case FEMALE -> String.format("una infante %s", iso8601ToMonthDay(iso8601Age)); + case MALE -> String.format("un infante %s", iso8601ToMonthDay(iso8601Age)); + default -> String.format("un infante %s", iso8601ToMonthDay(iso8601Age)); + }; + } else { + return switch (psex) { + case FEMALE -> "neonata"; + case MALE -> "neonato"; + default -> "neonato"; + }; + } + } + + private String hpoOnsetIndividualDescription(PhenopacketSex psex, HpoOnsetAge hpoOnsetTermAge) { + if (hpoOnsetTermAge.isFetus()) { + return switch (psex) { + case FEMALE -> "feto femmina"; + case MALE -> "feto maschio"; + default -> "feto"; + }; + } else if (hpoOnsetTermAge.isCongenital()) { + return switch (psex) { + case FEMALE -> "una neonata"; + case MALE -> "un neonato"; + default -> "un neonato"; + }; + } else if (hpoOnsetTermAge.isInfant()) { + return switch (psex) { + case FEMALE -> FEMALE_INFANT; + case MALE -> "un infante maschio"; + default -> "un infante"; + }; + } else if (hpoOnsetTermAge.isChild()) { + return switch (psex) { + case FEMALE -> "bambina"; + case MALE -> "bambino"; + default -> "bambino"; + }; + } else if (hpoOnsetTermAge.isJuvenile()) { + return switch (psex) { + case FEMALE -> "un'adolescente femmina"; + case MALE -> "un adolescente maschio"; + default -> "un adolescente"; + }; + }else { + return switch (psex) { + case FEMALE -> "una donna"; + case MALE -> "un uomo"; + default -> "un adulto"; + }; + } + } + + /** + * A sentence such as The proband was a 39-year old woman who presented at the age of 12 years with + * HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. This method returns the phrase that ends with "with" + * El sujeto era un niño de 1 año y 10 meses que se presentó como recién nacido con un filtrum largo. + * @param psex + * @param lastExamAge + * @param onsetAge + * @return + */ + private String onsetAndLastEncounterAvailable(PhenopacketSex psex, PhenopacketAge lastExamAge, PhenopacketAge onsetAge) { + String individualDescription; + String onsetDescription; + if (lastExamAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) lastExamAge; + individualDescription = iso8601individualDescription(psex, isoAge); + } else if (lastExamAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) lastExamAge; + individualDescription = hpoOnsetIndividualDescription(psex,hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize last exam age type " + lastExamAge.ageType()); + } + if (onsetAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) onsetAge; + onsetDescription = iso8601AtAgeOf(isoAge); + } else if (onsetAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) onsetAge; + onsetDescription = onsetTermAtAgeOf(hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); + } + return String.format("Il soggetto era %s che si è presentato %s con", individualDescription, onsetDescription); + } + + + /** + * Age at last examination available but age of onset not available + * The proband was a 39-year old woman who presented with HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. + * @param psex + * @param lastExamAge + */ + private String lastEncounterAvailable(PhenopacketSex psex, PhenopacketAge lastExamAge) { + String individualDescription; + if (lastExamAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) lastExamAge; + individualDescription = iso8601individualDescription(psex, isoAge); + } else if (lastExamAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) lastExamAge; + individualDescription = hpoOnsetIndividualDescription(psex,hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize last exam age type " + lastExamAge.ageType()); + } + return String.format("Il paziente era %s che si è presentato con", individualDescription); + } + + /** + * Age at last examination not available but age of onset available + * The proband presented at the age of 12 years with HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. + * @param psex + * @param onsetAge + * @return + */ + private String onsetAvailable(PhenopacketSex psex, PhenopacketAge onsetAge) { + String onsetDescription; + if (onsetAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) onsetAge; + onsetDescription = iso8601AtAgeOf(isoAge); + } else if (onsetAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) onsetAge; + onsetDescription = onsetTermAtAgeOf(hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); + } + return String.format("Il paziente si è presentato con %s", onsetDescription); + } + + private String ageNotAvailable(PhenopacketSex psex) { + return switch (psex) { + case FEMALE -> "La paziente si è presentata con"; + case MALE -> "Il paziente si è presentato con"; + default -> "Il paziente si è presentato con"; + }; + } + + @Override + public String heSheIndividual(PhenopacketSex psex) { + return switch (psex) { + case FEMALE -> "lui"; + case MALE -> "lei"; + default -> "la persona"; + }; + } + + @Override + public String atAge(PhenopacketAge ppktAge) { + if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + return "All'età di " + atIsoAgeExact(ppktAge); + } else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + String label = ppktAge.age(); // something like "Infantile onset" + return switch (label) { + case "Infantile onset" -> "Durante il periodo infantile"; + case "Childhood onset" -> "Durante l'infanzia"; + case "Neonatal onset" -> "Durante il periodo neonatale"; + case "Congenital onset" -> "Alla nascita"; + case "Adult onset" -> "Da adulto"; + default-> String.format("Durante il %s periodo", label.replace(" onset", "")); + }; + } else { + return ""; // should never get here + } + } + + // @Override + public String ppktSex(PpktIndividual individual) { + PhenopacketSex psex = individual.getSex(); + Optional ageOpt = individual.getAgeAtLastExamination(); + if (ageOpt.isEmpty()) { + ageOpt = individual.getAgeAtOnset(); + } + if (ageOpt.isEmpty()) { + return switch (psex) { + case FEMALE -> FEMALE_ADULT; + case MALE -> MALE_ADULT; + default -> ADULT; + }; + } + PhenopacketAge age = ageOpt.get();; + if (age.isChild()) { + return switch (psex) { + case FEMALE -> FEMALE_CHILD; + case MALE -> MALE_CHILD; + default -> CHILD; + }; + } else if (age.isCongenital()) { + return switch (psex) { + case FEMALE -> FEMALE_NEWBORN; + case MALE -> MALE_NEWBORN; + default -> NEWBORN; + }; + } else if (age.isFetus()) { + return switch (psex) { + case FEMALE -> FEMALE_FETUS; + case MALE -> MALE_FETUS; + default -> FETUS; + }; + } else if (age.isInfant()) { + return switch (psex) { + case FEMALE -> FEMALE_INFANT; + case MALE -> MALE_INFANT; + default -> INFANT; + }; + } else { + return switch (psex) { + case FEMALE -> FEMALE_ADULT; + case MALE -> MALE_ADULT; + default -> ADULT; + }; + } + } + + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktPhenotypicfeatureItalian.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktPhenotypicfeatureItalian.java new file mode 100644 index 0000000..2e0f187 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktPhenotypicfeatureItalian.java @@ -0,0 +1,90 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.italian; + +import org.monarchinitiative.phenopacket2prompt.international.HpInternational; +import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; +import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.function.Predicate; + +public class PpktPhenotypicfeatureItalian implements PpktPhenotypicFeatureGenerator { + + private final HpInternational italian; + + public PpktPhenotypicfeatureItalian(HpInternational international) { + italian = international; + } + + + + private List getTranslations(List ontologyTerms) { + List labels = new ArrayList<>(); + for (var term: ontologyTerms) { + Optional opt = italian.getLabel(term.getTid()); + if (opt.isPresent()) { + labels.add(opt.get()); + } else { + System.err.printf("[ERROR] Could not find %s translation for %s (%s).\n", italian.getLanguageAcronym(), term.getLabel(), term.getTid().getValue()); + } + } + return labels; + } + + + private final Set vowels = Set.of('A', 'E', 'I', 'O', 'U'); + + private String getOxfordCommaList(List items) { + if (items.size() == 1) { + return items.get(0); + } + if (items.size() == 2) { + // no comma if we just have two items. + // one item will work with the below code + return String.join(" and ", items); + } + String symList = String.join(", ", items); + int jj = symList.lastIndexOf(", "); + if (jj > 0) { + String end = symList.substring(jj+2); + if (vowels.contains(end.charAt(0))) { + symList = symList.substring(0, jj) + " e " + end; + } else { + symList = symList.substring(0, jj) + " e " + end; + } + } + return symList; + } + + @Override + public String formatFeatures(List ontologyTerms) { + List observedTerms = ontologyTerms.stream() + .filter(Predicate.not(OntologyTerm::isExcluded)) + .toList(); + List observedLabels = getTranslations(observedTerms); + List excludedTerms = ontologyTerms.stream() + .filter(OntologyTerm::isExcluded).toList(); + List excludedLabels = getTranslations(excludedTerms); + if (observedLabels.isEmpty() && excludedLabels.isEmpty()) { + return "nessuna anomalia fenotipica"; // should never happen, actually! + } else if (excludedLabels.isEmpty()) { + return getOxfordCommaList(observedLabels) + ". "; + } else if (observedLabels.isEmpty()) { + if (excludedLabels.size() > 1) { + return String.format("E' stata esclusa la presenza dei seguenti sintomi: %s.", getOxfordCommaList(excludedLabels)); + } else { + return String.format("E' stata esclusa la presenza del seguente sintomo: %s.",excludedLabels.get(0)); + } + } else { + String exclusion; + if (excludedLabels.size() == 1) { + exclusion = String.format(" ed è stata esclusa la presenza di %s.", getOxfordCommaList(excludedLabels)); + } else { + exclusion = String.format(" ed è stata esclusa la presenza di %s.", getOxfordCommaList(excludedLabels)); + } + return getOxfordCommaList(observedLabels) + exclusion; + } + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktTextItalian.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktTextItalian.java new file mode 100644 index 0000000..36e5a72 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktTextItalian.java @@ -0,0 +1,24 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.italian; + +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketTextGenerator; + +public class PpktTextItalian implements PhenopacketTextGenerator { + + @Override + public String QUERY_HEADER() { + return """ +Sto conducendo un esperimento riguardo a un caso clinico per confrontare le tue diagnosi con quelle di esperti umani. Ti darò una parte di un caso medico. Non stai cercando di curare alcun paziente. In questo caso, sei il "Dr. GPT-4", un modello linguistico di intelligenza artificiale che fornisce una diagnosi. Ecco alcune linee guida. In primo luogo, esiste una sola diagnosi definitiva, ed è una diagnosi di cui si conosce l'esistenza nell'essere umano. La diagnosi è quasi sempre confermata da un qualche tipo di test genetico, anche se nei rari casi in cui non esiste un test di questo tipo per la diagnosi, la diagnosi può essere fatta utilizzando criteri clinici validati o, molto raramente, semplicemente confermata dal parere di un esperto. Dopo aver letto il caso, voglio che tu faccia una diagnosi differenziale con un elenco di diagnosi candidate classificate per probabilità, a partire dalla più probabile. Ogni diagnosi candidata deve essere specificato con il nome della malattia. Per esempio, se il primo candidato è la sindrome branchiooculofacciale e il secondo è la fibrosi cistica, fornisci quanto segue, in inglese: + +1. Branchiooculofacial syndrome +2. Cystic fibrosis + +L'elenco deve contenere il numero di diagnosi che ritieni ragionevole. + +Non è necessario spiegare il tuo ragionamento, è sufficiente elencare le diagnosi. +Ti sto fornendo queste istruzioni in italiano, ma voglio che tu fornisca la totalità delle tue risposte in inglese. +Ecco il caso: + +"""; + } + +} From fd083c38b0352a19e4509c466f2468d2d340bf7f Mon Sep 17 00:00:00 2001 From: Leonardo macOS Date: Mon, 27 May 2024 19:30:57 +0200 Subject: [PATCH 18/39] minor linguistic bug fixes in it and es, e.g. 'the patient presented at an age with a symptom' instead of 'with at an age a symptom --- .../impl/italian/PpktIndividualItalian.java | 49 ++++++++++--------- .../impl/spanish/PpktIndividualSpanish.java | 47 +++++++++++++----- 2 files changed, 60 insertions(+), 36 deletions(-) diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalian.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalian.java index 3a5b8a4..8e90f13 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalian.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalian.java @@ -11,7 +11,6 @@ public class PpktIndividualItalian implements PhenopacketIndividualInformationGenerator { - private static final String FEMALE_FETUS = "un feto femmina"; private static final String MALE_FETUS = "un feto maschio"; private static final String FETUS = "un feto"; @@ -28,6 +27,10 @@ public class PpktIndividualItalian implements PhenopacketIndividualInformationGe private static final String MALE_CHILD = "un bambino"; private static final String CHILD = "un bambino"; + private static final String FEMALE_ADOLESCENT = "un'adolescente femmina"; + private static final String MALE_ADOLESCENT = "un adolescente maschio"; + private static final String ADOLESCENT = "un adolescente"; + private static final String FEMALE_ADULT = "una donna"; private static final String MALE_ADULT = "un uomo"; private static final String ADULT = "una persona adulta"; @@ -264,9 +267,9 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 }; } else { return switch (psex) { - case FEMALE -> "neonata"; - case MALE -> "neonato"; - default -> "neonato"; + case FEMALE -> FEMALE_NEWBORN; + case MALE -> MALE_NEWBORN; + default -> NEWBORN; }; } } @@ -274,39 +277,39 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 private String hpoOnsetIndividualDescription(PhenopacketSex psex, HpoOnsetAge hpoOnsetTermAge) { if (hpoOnsetTermAge.isFetus()) { return switch (psex) { - case FEMALE -> "feto femmina"; - case MALE -> "feto maschio"; - default -> "feto"; + case FEMALE -> FEMALE_FETUS; + case MALE -> MALE_FETUS; + default -> FETUS; }; } else if (hpoOnsetTermAge.isCongenital()) { return switch (psex) { - case FEMALE -> "una neonata"; - case MALE -> "un neonato"; - default -> "un neonato"; + case FEMALE -> FEMALE_NEWBORN; + case MALE -> MALE_NEWBORN; + default -> NEWBORN; }; } else if (hpoOnsetTermAge.isInfant()) { return switch (psex) { case FEMALE -> FEMALE_INFANT; - case MALE -> "un infante maschio"; - default -> "un infante"; + case MALE -> MALE_INFANT; + default -> INFANT; }; } else if (hpoOnsetTermAge.isChild()) { return switch (psex) { - case FEMALE -> "bambina"; - case MALE -> "bambino"; - default -> "bambino"; + case FEMALE -> FEMALE_CHILD; + case MALE -> MALE_CHILD; + default -> CHILD; }; } else if (hpoOnsetTermAge.isJuvenile()) { return switch (psex) { - case FEMALE -> "un'adolescente femmina"; - case MALE -> "un adolescente maschio"; - default -> "un adolescente"; + case FEMALE -> FEMALE_ADOLESCENT; + case MALE -> MALE_ADOLESCENT; + default -> ADOLESCENT; }; }else { return switch (psex) { - case FEMALE -> "una donna"; - case MALE -> "un uomo"; - default -> "un adulto"; + case FEMALE -> FEMALE_ADULT; + case MALE -> MALE_ADULT; + default -> ADULT; }; } } @@ -365,7 +368,7 @@ private String lastEncounterAvailable(PhenopacketSex psex, PhenopacketAge lastEx // should never happen throw new PhenolRuntimeException("Did not recognize last exam age type " + lastExamAge.ageType()); } - return String.format("Il paziente era %s che si è presentato con", individualDescription); + return String.format("Il paziente era %s che si è presentato ", individualDescription); } /** @@ -387,7 +390,7 @@ private String onsetAvailable(PhenopacketSex psex, PhenopacketAge onsetAge) { // should never happen throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); } - return String.format("Il paziente si è presentato con %s", onsetDescription); + return String.format("Il paziente si è presentato %s con", onsetDescription); } private String ageNotAvailable(PhenopacketSex psex) { diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java index 6f5970b..62ec899 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java @@ -10,9 +10,30 @@ public class PpktIndividualSpanish implements PhenopacketIndividualInformationGenerator { + //TODO translate from ita to spanish and edit this file in order to actually use these + private static final String FEMALE_FETUS = "un feto femenino"; + private static final String MALE_FETUS = "un feto masculino"; + private static final String FETUS = "un feto"; + + private static final String FEMALE_NEWBORN = "una niña recién nacida"; // CHECK + private static final String MALE_NEWBORN = "un neonato maschio"; + private static final String NEWBORN = "un neonato"; private static final String FEMALE_INFANT = "un bebé femenino"; + private static final String MALE_INFANT = "un bebé masculino"; + private static final String INFANT = "un bebé"; + + private static final String FEMALE_CHILD = "una bambina"; + private static final String MALE_CHILD = "un bambino"; + private static final String CHILD = "un bambino"; + + private static final String FEMALE_ADOLESCENT = "un'adolescente femmina"; + private static final String MALE_ADOLESCENT = "un adolescente maschio"; + private static final String ADOLESCENT = "un adolescente"; + private static final String FEMALE_ADULT = "una donna"; + private static final String MALE_ADULT = "un uomo"; + private static final String ADULT = "una persona adulta"; /** @@ -81,19 +102,19 @@ public String ageAndSexAtLastExamination(PpktIndividual individual) { }; } else if (age.isFetus()) { return switch (psex) { - case FEMALE -> "un feto femenino"; - case MALE -> "un feto masculino"; - default -> "un feto"; + case FEMALE -> FEMALE_FETUS; + case MALE -> MALE_FETUS; + default -> FETUS; }; } else if (age.isInfant()) { return switch (psex) { case FEMALE -> FEMALE_INFANT; - case MALE -> "un bebé masculino"; - default -> "un bebé"; + case MALE -> MALE_INFANT; + default -> INFANT; }; } else { return switch (psex) { - case FEMALE -> "un mujer"; + case FEMALE -> "una mujer"; case MALE -> "un hombre"; default -> "una persona adulta"; }; @@ -256,21 +277,21 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 private String hpoOnsetIndividualDescription(PhenopacketSex psex, HpoOnsetAge hpoOnsetTermAge) { if (hpoOnsetTermAge.isFetus()) { return switch (psex) { - case FEMALE -> FEMALE_INFANT; - case MALE -> "feto masculino"; - default -> "feto"; + case FEMALE -> FEMALE_FETUS; + case MALE -> MALE_FETUS; + default -> FETUS; }; } else if (hpoOnsetTermAge.isCongenital()) { return switch (psex) { - case FEMALE -> "una niña recién nacida"; + case FEMALE -> "una niña recién nacida"; case MALE -> "un niño recién nacido"; default -> "un bebe recién nacido"; }; } else if (hpoOnsetTermAge.isInfant()) { return switch (psex) { case FEMALE -> FEMALE_INFANT; - case MALE -> "un bebé masculino"; - default -> "un bebé"; + case MALE -> MALE_INFANT; + default -> INFANT; }; } else if (hpoOnsetTermAge.isChild()) { return switch (psex) { @@ -369,7 +390,7 @@ private String onsetAvailable(PhenopacketSex psex, PhenopacketAge onsetAge) { // should never happen throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); } - return String.format("El paciente se presentó con %s", onsetDescription); + return String.format("El paciente se presentó %s con", onsetDescription); } private String ageNotAvailable(PhenopacketSex psex) { From f2cde6505bf8e8ebe547fcd8c0d54be62bfa7391 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Tue, 28 May 2024 16:35:49 +0200 Subject: [PATCH 19/39] param. testing --- pom.xml | 4 +- .../cmd/GbtTranslateBatchCommand.java | 4 +- .../cmd/GptTranslateCommand.java | 2 +- .../model/PpktIndividual.java | 19 +++-- .../impl/english/PpktIndividualEnglish.java | 8 +-- .../model/PpktIndividualTest.java | 2 +- .../output/PPKtIndividualBase.java | 50 +++++++++++++ .../english/PpktIndividualEnglishTest.java | 72 +++++++++++++++++++ 8 files changed, 147 insertions(+), 14 deletions(-) create mode 100644 src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java create mode 100644 src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java diff --git a/pom.xml b/pom.xml index cd43cac..97153d4 100644 --- a/pom.xml +++ b/pom.xml @@ -186,8 +186,8 @@ maven-compiler-plugin 3.8.1 - ${java.version} - ${java.version} + 21 + 21 diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java index cef1fc3..a79a0b4 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java @@ -101,7 +101,7 @@ private void outputPromptsInternational(List ppktFiles, Ontology hpo, Stri createDir(dirpath); List diagnosisList = new ArrayList<>(); for (var f: ppktFiles) { - PpktIndividual individual = new PpktIndividual(f); + PpktIndividual individual = PpktIndividual.fromFile(f); List diseaseList = individual.getDiseases(); if (diseaseList.size() != 1) { System.err.printf("[ERROR] Got %d diseases for %s.\n", diseaseList.size(), individual.getPhenopacketId()); @@ -127,7 +127,7 @@ private List outputPromptsEnglish(List ppktFiles, Ontology PromptGenerator generator = PromptGenerator.english(hpo); for (var f: ppktFiles) { - PpktIndividual individual = new PpktIndividual(f); + PpktIndividual individual = PpktIndividual.fromFile(f); List diseaseList = individual.getDiseases(); if (diseaseList.size() != 1) { System.err.printf("[ERROR] Got %d diseases for %s.\n", diseaseList.size(), individual.getPhenopacketId()); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java index 9dc8728..6efb83c 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java @@ -55,7 +55,7 @@ public Integer call() throws Exception { System.out.println(hpo.version().orElse("n/a")); PromptGenerator generator = PromptGenerator.english(hpo); - PpktIndividual individual = new PpktIndividual(new File(ppkt)); + PpktIndividual individual = PpktIndividual.fromFile(new File(ppkt)); String prompt = generator.createPrompt(individual); System.out.println(prompt); // SPANISH diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividual.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividual.java index a00aff6..ab5dc0b 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividual.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividual.java @@ -17,14 +17,19 @@ import java.util.*; public class PpktIndividual { - final Logger LOGGER = LoggerFactory.getLogger(PpktIndividual.class); + private static final Logger LOGGER = LoggerFactory.getLogger(PpktIndividual.class); private final Phenopacket ppkt; private final String phenopacketId; - public PpktIndividual(File ppktJsonFile) { + public PpktIndividual(Phenopacket ppkt) { + this.ppkt = ppkt; + this.phenopacketId = ppkt.getId(); + } + + public static PpktIndividual fromFile(File ppktJsonFile) { JSONParser parser = new JSONParser(); try { Object obj = parser.parse(new FileReader(ppktJsonFile)); @@ -32,14 +37,20 @@ public PpktIndividual(File ppktJsonFile) { String phenopacketJsonString = jsonObject.toJSONString(); Phenopacket.Builder phenoPacketBuilder = Phenopacket.newBuilder(); JsonFormat.parser().merge(phenopacketJsonString, phenoPacketBuilder); - this.ppkt = phenoPacketBuilder.build(); + Phenopacket ppkt = phenoPacketBuilder.build(); + return new PpktIndividual(ppkt); } catch (IOException | ParseException e1) { LOGGER.error("Could not ingest phenopacket: {}", e1.getMessage()); throw new PhenolRuntimeException("Could not load phenopacket at " + ppktJsonFile); } - this.phenopacketId = ppkt.getId(); } + public static PpktIndividual fromPhenopacket(Phenopacket ppkt) { + return new PpktIndividual(ppkt); + } + + + public String getPhenopacketId() { return phenopacketId; } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglish.java index 75bac66..f1d66c8 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglish.java @@ -104,7 +104,7 @@ private String onsetTermAtAgeOf(HpoOnsetAge hpoOnsetTermAge) { if (hpoOnsetTermAge.isFetus()) { return "in the fetal period"; } else if (hpoOnsetTermAge.isCongenital()) { - return "as a newborn"; + return "at birth"; } else if (hpoOnsetTermAge.isInfant()) { return "as an infant"; } else if (hpoOnsetTermAge.isChild()) { @@ -142,9 +142,9 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 }; } else if (m>0 || d> 0) { return switch (psex) { - case FEMALE -> String.format("%s baby girl", iso8601ToMonthDay(iso8601Age)); - case MALE -> String.format("\"%s baby boy", iso8601ToMonthDay(iso8601Age)); - default -> String.format("%s baby", iso8601ToMonthDay(iso8601Age)); + case FEMALE -> String.format("%s female infant", iso8601ToMonthDay(iso8601Age)); + case MALE -> String.format("%s male infant", iso8601ToMonthDay(iso8601Age)); + default -> String.format("%s infant", iso8601ToMonthDay(iso8601Age)); }; } else { return switch (psex) { diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividualTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividualTest.java index efbb8b6..0bc9242 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividualTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividualTest.java @@ -22,7 +22,7 @@ public class PpktIndividualTest { private static final ClassLoader classLoader = PpktIndividualTest.class.getClassLoader(); private static final URL resource = (classLoader.getResource(ppktPath)); private static final File file = new File(resource.getFile()); - private static final PpktIndividual ppktIndividual = new PpktIndividual(file); + private static final PpktIndividual ppktIndividual = PpktIndividual.fromFile(file); @Test diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java new file mode 100644 index 0000000..eb6949a --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java @@ -0,0 +1,50 @@ +package org.monarchinitiative.phenopacket2prompt.output; + +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; +import org.phenopackets.phenopackettools.builder.builders.DiseaseBuilder; +import org.phenopackets.phenopackettools.builder.builders.IndividualBuilder; +import org.phenopackets.phenopackettools.builder.builders.MetaDataBuilder; +import org.phenopackets.phenopackettools.builder.builders.TimeElements; +import org.phenopackets.schema.v2.core.Disease; +import org.phenopackets.schema.v2.core.Individual; +import org.phenopackets.schema.v2.core.MetaData; + +import java.util.function.Supplier; +import java.util.stream.Stream; + +public class PPKtIndividualBase { + private final static MetaData metadata = MetaDataBuilder.builder("curator").build(); + + public sealed interface TestOutcome { + record Success(String value) implements TestOutcome {} + record Failure(Supplier exceptionSupplier) implements TestOutcome {} + } + + public record TestCase(String description, PpktIndividual ppktIndividual, TestOutcome expectedOutcome) {} + + + + + + + public static PpktIndividual female46yearsInfantileOnset() { + PhenopacketBuilder builder = PhenopacketBuilder.create("id1", metadata); + Disease d = DiseaseBuilder.builder("OMIM:100123", "test").onset(TimeElements.infantileOnset()).build(); + Individual subject = IndividualBuilder.builder("individual.1").female().ageAtLastEncounter("P46Y").build(); + builder.individual(subject).addDisease(d); + return new PpktIndividual(builder.build()); + } + + public static PpktIndividual male4monthsCongenitalOnset() { + PhenopacketBuilder builder = PhenopacketBuilder.create("id2", metadata); + Disease d = DiseaseBuilder.builder("OMIM:100123", "test").onset(TimeElements.congenitalOnset()).build(); + Individual subject = IndividualBuilder.builder("individual.2").male().ageAtLastEncounter("P4M").build(); + builder.individual(subject).addDisease(d); + return new PpktIndividual(builder.build()); + } + + + + +} diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java new file mode 100644 index 0000000..4e95708 --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java @@ -0,0 +1,72 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.english; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.monarchinitiative.phenopacket2prompt.model.Iso8601Age; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketIndividualInformationGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase; + +import java.util.function.Supplier; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public class PpktIndividualEnglishTest extends PPKtIndividualBase{ + + + + private static Stream provideExpressionsForEvaluate() { + return Stream.of( + new TestCase("46 year olf female, infantile onset", + female46yearsInfantileOnset(), new TestOutcome.Success("The proband was a 46-year old woman who presented as an infant with")), + new TestCase("male 4 months, congenital onset", + male4monthsCongenitalOnset(), new TestOutcome.Success("The proband was a 4-month old male infant who presented at birth with")) + ); + } + + + + + @ParameterizedTest + @MethodSource("provideExpressionsForEvaluate") + void testEvaluateExpression(TestCase testCase) { + PhenopacketIndividualInformationGenerator generator = new PpktIndividualEnglish(); + PpktIndividual ppkti = testCase.ppktIndividual(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Success(String expectedResult) -> + assertEquals(expectedResult, generator.getIndividualDescription(ppkti), + "Incorrect evaluation for: " + testCase.description()); + case TestOutcome.Failure(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.getIndividualDescription(ppkti), + "Incorrect error handling for: " + testCase.description()); + } + } + + + + @Test + public void test1() { + PpktIndividual ppkti = PPKtIndividualBase.female46yearsInfantileOnset(); + PhenopacketIndividualInformationGenerator generator = new PpktIndividualEnglish(); + String expected = "she"; + assertEquals(expected, generator.heSheIndividual(ppkti.getSex())); + String expectedDescription = "The proband was a 46-year old woman who presented as an infant with"; + assertEquals(expectedDescription, generator.getIndividualDescription(ppkti)); + String expectedAtAge = "3"; + + } + + @Test + public void testIsoAge() { + PhenopacketAge age = new Iso8601Age("P46Y"); + PhenopacketIndividualInformationGenerator generator = new PpktIndividualEnglish(); + assertEquals("P46Y", generator.atAge(age)); + } + + +} From 0fef5308813f502a229cef42ca799b9aed669733 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 29 May 2024 15:02:25 +0200 Subject: [PATCH 20/39] adding testing for age & sex (English) --- .../phenopacket2prompt/model/HpoOnsetAge.java | 43 +++++++-- .../phenopacket2prompt/model/Iso8601Age.java | 10 +- ....java => PPKtIndividualInfoGenerator.java} | 2 +- .../impl/dutch/DutchPromptGenerator.java | 6 +- .../impl/dutch/PpktIndividualDutch.java | 4 +- .../impl/english/EnglishPromptGenerator.java | 2 +- .../impl/english/PpktIndividualEnglish.java | 27 +++--- .../impl/german/GermanPromptGenerator.java | 4 +- .../impl/german/PpktIndividualGerman.java | 4 +- .../impl/spanish/PpktIndividualSpanish.java | 4 +- .../impl/spanish/SpanishPromptGenerator.java | 2 +- .../output/PPKtIndividualBase.java | 23 ++++- .../english/PpktIndividualEnglishTest.java | 91 ++++++++++++++----- 13 files changed, 156 insertions(+), 66 deletions(-) rename src/main/java/org/monarchinitiative/phenopacket2prompt/output/{PhenopacketIndividualInformationGenerator.java => PPKtIndividualInfoGenerator.java} (87%) diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/HpoOnsetAge.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/HpoOnsetAge.java index b8a487f..09f8ca9 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/HpoOnsetAge.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/HpoOnsetAge.java @@ -14,31 +14,38 @@ public class HpoOnsetAge implements PhenopacketAge { private final int totalDays; - /** One of Antenatal onset HP:0030674; Fetal onset HP:0011461; Late first trimester onset HP:0034199; - * Third trimester onset HP:0034197; Second trimester onset HP:0034198; Embryonal onset HP:0011460*/ - private final static Set fetalIds = Set.of(TermId.of(" HP:0030674"), TermId.of("HP:0011461"), TermId.of("HP:0034199"), + /** + * One of Antenatal onset HP:0030674; Fetal onset HP:0011461; Late first trimester onset HP:0034199; + * Third trimester onset HP:0034197; Second trimester onset HP:0034198; Embryonal onset HP:0011460 + */ + private final static Set fetalIds = Set.of(TermId.of(" HP:0030674"), TermId.of("HP:0011461"), TermId.of("HP:0034199"), TermId.of("HP:0034197"), TermId.of("HP:0034198"), TermId.of("HP:0011460*")); - /** Childhood onset */ + /** + * Childhood onset + */ private final static TermId childhoodOnset = TermId.of("HP:0011463"); private final static TermId juvenileOnset = TermId.of("HP:0003621"); - /** Infantile onset */ + /** + * Infantile onset + */ private final static TermId infantileOnset = TermId.of("HP:0003593"); - /** Congenital onset */ + /** + * Congenital onset + */ private final static TermId congenitalOnset = TermId.of("HP:0003577"); - public HpoOnsetAge(String id, String label) { this.tid = TermId.of(id); this.label = label; Optional opt = HpoOnset.fromTermId(tid); if (opt.isPresent()) { HpoOnset onset = opt.get(); - totalDays = (int) ( onset.start().days() /2+ onset.end().days()/2); + totalDays = (int) (onset.start().days() / 2 + onset.end().days() / 2); } else { totalDays = Integer.MAX_VALUE; } @@ -90,4 +97,24 @@ public int totalDays() { public TermId getTid() { return tid; } + + + public static HpoOnsetAge childhood() { + return new HpoOnsetAge(childhoodOnset.getValue(), "Childhood onset"); + } + + public static HpoOnsetAge juvenile() { + return new HpoOnsetAge(juvenileOnset.getValue(), "Juvenile onset"); + } + + + public static HpoOnsetAge infantile() { + return new HpoOnsetAge(infantileOnset.getValue(), "Infantile onset"); + } + + + public static HpoOnsetAge congenital() { + return new HpoOnsetAge(congenitalOnset.getValue(), "Congenital onset"); + } + } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java index 34e5781..8ffe9f9 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java @@ -57,12 +57,14 @@ public int getDays() { @Override public String age() { StringBuilder sb = new StringBuilder(); - if (years > 0) { - return String.format("%d year-old", years); + if (years == 1) { + return "one year"; + } else if (years > 1) { + return String.format("%d years", years); } else if (months > 0) { - return String.format("%d month-old", months); + return String.format("%d months", months); } else { - return String.format("%d day-old", days); + return String.format("%d days", days); } } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketIndividualInformationGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualInfoGenerator.java similarity index 87% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketIndividualInformationGenerator.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualInfoGenerator.java index 7506bb1..86a8414 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketIndividualInformationGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualInfoGenerator.java @@ -4,7 +4,7 @@ import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; -public interface PhenopacketIndividualInformationGenerator { +public interface PPKtIndividualInfoGenerator { String getIndividualDescription(PpktIndividual individual); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java index 110c5a5..af00045 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java @@ -5,12 +5,10 @@ import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; -import org.monarchinitiative.phenopacket2prompt.output.PhenopacketIndividualInformationGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; import org.monarchinitiative.phenopacket2prompt.output.PhenopacketTextGenerator; import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator; -import org.monarchinitiative.phenopacket2prompt.output.impl.dutch.PpktIndividualDutch; -import org.monarchinitiative.phenopacket2prompt.output.impl.dutch.PpktTextDutch; import java.util.List; @@ -19,7 +17,7 @@ public class DutchPromptGenerator implements PromptGenerator { private final Ontology hpo; - private final PhenopacketIndividualInformationGenerator ppktAgeSexGenerator; + private final PPKtIndividualInfoGenerator ppktAgeSexGenerator; private final PhenopacketTextGenerator ppktTextGenerator; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java index 74461f3..ac0f553 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java @@ -2,13 +2,13 @@ import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenopacket2prompt.model.*; -import org.monarchinitiative.phenopacket2prompt.output.PhenopacketIndividualInformationGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; import java.util.ArrayList; import java.util.List; import java.util.Optional; -public class PpktIndividualDutch implements PhenopacketIndividualInformationGenerator { +public class PpktIndividualDutch implements PPKtIndividualInfoGenerator { /** diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGenerator.java index 905bd5e..3218182 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGenerator.java @@ -13,7 +13,7 @@ public class EnglishPromptGenerator implements PromptGenerator { private final Ontology hpo; - private final PhenopacketIndividualInformationGenerator ppktAgeGenerator; + private final PPKtIndividualInfoGenerator ppktAgeGenerator; private final PhenopacketTextGenerator ppktTextGenerator; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglish.java index f1d66c8..d8cd917 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglish.java @@ -2,13 +2,13 @@ import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenopacket2prompt.model.*; -import org.monarchinitiative.phenopacket2prompt.output.PhenopacketIndividualInformationGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; import java.util.ArrayList; import java.util.List; import java.util.Optional; -public class PpktIndividualEnglish implements PhenopacketIndividualInformationGenerator { +public class PpktIndividualEnglish implements PPKtIndividualInfoGenerator { public PpktIndividualEnglish() { @@ -60,7 +60,7 @@ private String iso8601ToMonthDay(Iso8601Age iso8601Age) { } else if (d>0){ return String.format("%d-month, %d-day old", m, d); } else { - return String.format("%d-month old", m, d); + return String.format("%d-month old", m); } } @@ -73,20 +73,17 @@ private String iso8601ToMonthDay(Iso8601Age iso8601Age) { private String iso8601AtAgeOf(Iso8601Age isoAge) { List components = new ArrayList<>(); - if (isoAge.getYears()>1) { - components.add(String.format("%d years", isoAge.getYears())); - } else if (isoAge.getYears() == 1) { - components.add("1 year"); + if (isoAge.getYears()>0) { + String ystring = isoAge.getYears() == 1 ? "year" : "years"; + components.add(String.format("%d %s", isoAge.getYears(), ystring)); } - if (isoAge.getMonths() > 1) { - components.add(String.format("%d months", isoAge.getMonths())); - } else if (isoAge.getMonths() == 1) { - components.add("1 month"); + if (isoAge.getMonths() > 0) { + String mstring = isoAge.getMonths() == 1 ? "month" : "months"; + components.add(String.format("%d %s", isoAge.getMonths(), mstring)); } - if (isoAge.getDays()>1) { - components.add(String.format("%d days", isoAge.getDays())); - } else if (isoAge.getDays()==1) { - components.add("1 day"); + if (isoAge.getDays()>0) { + String dstring = isoAge.getDays() == 1 ? "day" : "days"; + components.add(String.format("%d %s", isoAge.getDays(), dstring)); } if (components.isEmpty()) { return "as a newborn"; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java index 509ef4f..741939d 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java @@ -5,7 +5,7 @@ import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; -import org.monarchinitiative.phenopacket2prompt.output.PhenopacketIndividualInformationGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; import org.monarchinitiative.phenopacket2prompt.output.PhenopacketTextGenerator; import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator; @@ -17,7 +17,7 @@ public class GermanPromptGenerator implements PromptGenerator { private final Ontology hpo; - private final PhenopacketIndividualInformationGenerator ppktAgeSexGenerator; + private final PPKtIndividualInfoGenerator ppktAgeSexGenerator; private final PhenopacketTextGenerator ppktTextGenerator; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java index 22333ee..7536850 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java @@ -2,13 +2,13 @@ import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenopacket2prompt.model.*; -import org.monarchinitiative.phenopacket2prompt.output.PhenopacketIndividualInformationGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; import java.util.ArrayList; import java.util.List; import java.util.Optional; -public class PpktIndividualGerman implements PhenopacketIndividualInformationGenerator { +public class PpktIndividualGerman implements PPKtIndividualInfoGenerator { private static final String FEMALE_INFANT = "ein weiblicher Säugling"; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java index 6f5970b..6a4758e 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java @@ -2,13 +2,13 @@ import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenopacket2prompt.model.*; -import org.monarchinitiative.phenopacket2prompt.output.PhenopacketIndividualInformationGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; import java.util.ArrayList; import java.util.List; import java.util.Optional; -public class PpktIndividualSpanish implements PhenopacketIndividualInformationGenerator { +public class PpktIndividualSpanish implements PPKtIndividualInfoGenerator { private static final String FEMALE_INFANT = "un bebé femenino"; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java index ee48aee..1e626f8 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java @@ -14,7 +14,7 @@ public class SpanishPromptGenerator implements PromptGenerator { private final Ontology hpo; - private final PhenopacketIndividualInformationGenerator ppktAgeSexGenerator; + private final PPKtIndividualInfoGenerator ppktAgeSexGenerator; private final PhenopacketTextGenerator ppktTextGenerator; diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java index eb6949a..0a7a4c9 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java @@ -1,5 +1,9 @@ package org.monarchinitiative.phenopacket2prompt.output; +import org.monarchinitiative.phenopacket2prompt.model.Iso8601Age; +import org.monarchinitiative.phenopacket2prompt.model.HpoOnsetAge; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; import org.phenopackets.phenopackettools.builder.builders.DiseaseBuilder; @@ -11,7 +15,6 @@ import org.phenopackets.schema.v2.core.MetaData; import java.util.function.Supplier; -import java.util.stream.Stream; public class PPKtIndividualBase { private final static MetaData metadata = MetaDataBuilder.builder("curator").build(); @@ -21,10 +24,13 @@ record Success(String value) implements TestOutcome {} record Failure(Supplier exceptionSupplier) implements TestOutcome {} } - public record TestCase(String description, PpktIndividual ppktIndividual, TestOutcome expectedOutcome) {} + public record TestIdvlDescription(String description, PpktIndividual ppktIndividual, TestOutcome expectedOutcome) {} + public record TestIdvlHeShe(String description, PhenopacketSex ppktSex, TestOutcome expectedOutcome) {} + public record TestIdvlAtAge(String description, PhenopacketAge ppktAge, TestOutcome expectedOutcome) {} + @@ -47,4 +53,17 @@ public static PpktIndividual male4monthsCongenitalOnset() { + + + + public static PhenopacketAge congenital = HpoOnsetAge.congenital(); + public static PhenopacketAge infantile = HpoOnsetAge.infantile(); + public static PhenopacketAge juvenile = HpoOnsetAge.juvenile(); + public static PhenopacketAge childhood = HpoOnsetAge.childhood(); + public static PhenopacketAge p46y = new Iso8601Age("P46Y"); + + + + + } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java index 4e95708..752a468 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java @@ -5,8 +5,9 @@ import org.junit.jupiter.params.provider.MethodSource; import org.monarchinitiative.phenopacket2prompt.model.Iso8601Age; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; -import org.monarchinitiative.phenopacket2prompt.output.PhenopacketIndividualInformationGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase; import java.util.function.Supplier; @@ -19,22 +20,21 @@ public class PpktIndividualEnglishTest extends PPKtIndividualBase{ - private static Stream provideExpressionsForEvaluate() { + private static Stream testGetIndividualDescription() { return Stream.of( - new TestCase("46 year olf female, infantile onset", + new TestIdvlDescription("46 year olf female, infantile onset", female46yearsInfantileOnset(), new TestOutcome.Success("The proband was a 46-year old woman who presented as an infant with")), - new TestCase("male 4 months, congenital onset", + new TestIdvlDescription("male 4 months, congenital onset", male4monthsCongenitalOnset(), new TestOutcome.Success("The proband was a 4-month old male infant who presented at birth with")) ); } - @ParameterizedTest - @MethodSource("provideExpressionsForEvaluate") - void testEvaluateExpression(TestCase testCase) { - PhenopacketIndividualInformationGenerator generator = new PpktIndividualEnglish(); + @MethodSource("testGetIndividualDescription") + void testEvaluateExpression(TestIdvlDescription testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualEnglish(); PpktIndividual ppkti = testCase.ppktIndividual(); switch (testCase.expectedOutcome()) { case TestOutcome.Success(String expectedResult) -> @@ -49,24 +49,71 @@ void testEvaluateExpression(TestCase testCase) { - @Test - public void test1() { - PpktIndividual ppkti = PPKtIndividualBase.female46yearsInfantileOnset(); - PhenopacketIndividualInformationGenerator generator = new PpktIndividualEnglish(); - String expected = "she"; - assertEquals(expected, generator.heSheIndividual(ppkti.getSex())); - String expectedDescription = "The proband was a 46-year old woman who presented as an infant with"; - assertEquals(expectedDescription, generator.getIndividualDescription(ppkti)); - String expectedAtAge = "3"; + private static Stream testGetPPKtSex() { + return Stream.of( + new TestIdvlHeShe("female", + PhenopacketSex.FEMALE, new TestOutcome.Success("she")), + new TestIdvlHeShe("male", + PhenopacketSex.MALE, new TestOutcome.Success("he")), + new TestIdvlHeShe("proband", + PhenopacketSex.UNKNOWN, new TestOutcome.Success("the individual")) + ); + } + @ParameterizedTest + @MethodSource("testGetPPKtSex") + void testPPKtSex(TestIdvlHeShe testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualEnglish(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Success(String expectedResult) -> + assertEquals(expectedResult, generator.heSheIndividual(testCase.ppktSex())); + case TestOutcome.Failure(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.heSheIndividual(testCase.ppktSex()), + "Incorrect error handling for: " + testCase.description()); + } } - @Test - public void testIsoAge() { - PhenopacketAge age = new Iso8601Age("P46Y"); - PhenopacketIndividualInformationGenerator generator = new PpktIndividualEnglish(); - assertEquals("P46Y", generator.atAge(age)); + + +//public record TestIdvlAtAge(String description, PhenopacketAge ppktAge, TestOutcome expectedOutcome) {} + + + + + private static Stream testIndlAtAge() { + return Stream.of( + new TestIdvlAtAge("congenital", + congenital, new TestOutcome.Success("At birth")), + new TestIdvlAtAge("infantile", + infantile, new TestOutcome.Success("During the infantile period")), + new TestIdvlAtAge("childhood age", + childhood, new TestOutcome.Success("During childhood")), + new TestIdvlAtAge("46 years old", + p46y, new TestOutcome.Success("At an age of 46 years")) + ); } + @ParameterizedTest + @MethodSource("testIndlAtAge") + void testPPKtSex(TestIdvlAtAge testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualEnglish(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Success(String expectedResult) -> + assertEquals(expectedResult, generator.atAge(testCase.ppktAge())); + case TestOutcome.Failure(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.atAge(testCase.ppktAge()), + "Incorrect error handling for: " + testCase.description()); + } + + + } + + + + + + } From 5778ae0d5e7fc484dcc45eaa45362def02dcff02 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Thu, 30 May 2024 13:29:41 +0200 Subject: [PATCH 21/39] updating testing --- .../model/AgeNotSpecified.java | 18 +++++++++- .../phenopacket2prompt/model/HpoOnsetAge.java | 15 ++++++++- .../phenopacket2prompt/model/Iso8601Age.java | 17 +++++++++- .../model/PhenopacketAge.java | 3 +- .../PpktPhenotypicFeatureGenerator.java | 12 ++++++- .../english/PpktPhenotypicfeatureEnglish.java | 15 +++++++++ .../spanish/PpktPhenotypicfeatureSpanish.java | 3 +- .../output/PPKtIndividualBase.java | 4 +-- .../english/EnglishPromptGeneratorTest.java | 4 +++ .../english/PpktIndividualEnglishTest.java | 33 +++++++++---------- 10 files changed, 98 insertions(+), 26 deletions(-) create mode 100644 src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGeneratorTest.java diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/AgeNotSpecified.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/AgeNotSpecified.java index e94e62e..e2e9951 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/AgeNotSpecified.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/AgeNotSpecified.java @@ -1,6 +1,8 @@ package org.monarchinitiative.phenopacket2prompt.model; -public class AgeNotSpecified implements PhenopacketAge { +import java.util.Objects; + +public final class AgeNotSpecified implements PhenopacketAge { @Override public String age() { return ""; @@ -43,4 +45,18 @@ public int totalDays() { @Override public boolean specified() {return false; } + + @Override + public int hashCode() { + return Objects.hashCode(totalDays()); + } + + @Override + public boolean equals(Object obj) { + if (! (obj instanceof PhenopacketAge)) return false; + PhenopacketAge iso = (PhenopacketAge) obj; + return iso.totalDays() == totalDays(); + } + + } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/HpoOnsetAge.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/HpoOnsetAge.java index 09f8ca9..c89ea38 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/HpoOnsetAge.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/HpoOnsetAge.java @@ -3,10 +3,11 @@ import org.monarchinitiative.phenol.annotations.formats.hpo.HpoOnset; import org.monarchinitiative.phenol.ontology.data.TermId; +import java.util.Objects; import java.util.Optional; import java.util.Set; -public class HpoOnsetAge implements PhenopacketAge { +public final class HpoOnsetAge implements PhenopacketAge { private final TermId tid; private final String label; @@ -117,4 +118,16 @@ public static HpoOnsetAge congenital() { return new HpoOnsetAge(congenitalOnset.getValue(), "Congenital onset"); } + @Override + public int hashCode() { + return Objects.hashCode(totalDays()); + } + + @Override + public boolean equals(Object obj) { + if (! (obj instanceof PhenopacketAge)) return false; + PhenopacketAge iso = (PhenopacketAge) obj; + return iso.totalDays() == totalDays(); + } + } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java index 8ffe9f9..96f20fb 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java @@ -2,7 +2,9 @@ import org.monarchinitiative.phenol.base.PhenolRuntimeException; -public class Iso8601Age implements PhenopacketAge { +import java.util.Objects; + +public final class Iso8601Age implements PhenopacketAge { private final String iso8601; @@ -105,4 +107,17 @@ public boolean isCongenital() { public int totalDays() { return totalDays; } + + + @Override + public int hashCode() { + return Objects.hashCode(totalDays()); + } + + @Override + public boolean equals(Object obj) { + if (! (obj instanceof PhenopacketAge)) return false; + PhenopacketAge iso = (PhenopacketAge) obj; + return iso.totalDays() == totalDays(); + } } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PhenopacketAge.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PhenopacketAge.java index 32c0902..94c8498 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PhenopacketAge.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PhenopacketAge.java @@ -1,6 +1,6 @@ package org.monarchinitiative.phenopacket2prompt.model; -public interface PhenopacketAge { +public sealed interface PhenopacketAge permits AgeNotSpecified, HpoOnsetAge, Iso8601Age { String age(); PhenopacketAgeType ageType(); @@ -18,4 +18,5 @@ public interface PhenopacketAge { int totalDays(); default boolean specified() {return true; } + } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java index 3f2b24a..da7ed93 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java @@ -6,8 +6,18 @@ public interface PpktPhenotypicFeatureGenerator { - + // Please let's use the two functions below instead! + //@Deprecated(forRemoval = true) String formatFeatures( List ontologyTerms); + default String formatObservedFeatures(List oterms) { + return ""; // TODO implement in each implementing class, using defualt for now so as not to break code + } + + default String formatExcludedFeatures(List oterms) { + return ""; // TODO implement in each implementing class, using defualt for now so as not to break code + } + + } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicfeatureEnglish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicfeatureEnglish.java index 7bc0503..908c840 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicfeatureEnglish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicfeatureEnglish.java @@ -49,4 +49,19 @@ public String formatFeatures(List ontologyTerms) { return getOxfordCommaList(observed) + ", whereby " + exclusion; } } + + + @Override + public String formatObservedFeatures(List oterms) { + List observed = oterms.stream() + .filter(Predicate.not(OntologyTerm::isExcluded)) + .map(OntologyTerm::getLabel).toList(); + return getOxfordCommaList(observed); + + } + + @Override + public String formatExcludedFeatures(List oterms) { + return PpktPhenotypicFeatureGenerator.super.formatExcludedFeatures(oterms); + } } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java index 0be3121..435d772 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java @@ -1,5 +1,6 @@ package org.monarchinitiative.phenopacket2prompt.output.impl.spanish; +import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenopacket2prompt.international.HpInternational; import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; @@ -68,7 +69,7 @@ public String formatFeatures(List ontologyTerms) { .filter(OntologyTerm::isExcluded).toList(); List excludedLabels = getTranslations(excludedTerms); if (observedLabels.isEmpty() && excludedLabels.isEmpty()) { - return "no phenotypic abnormalities"; // should never happen, actually! + throw new PhenolRuntimeException("No phenotypic abnormalities"); // should never happen, actually! } else if (excludedLabels.isEmpty()) { return getOxfordCommaList(observedLabels) + ". "; } else if (observedLabels.isEmpty()) { diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java index 0a7a4c9..b342e01 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java @@ -20,8 +20,8 @@ public class PPKtIndividualBase { private final static MetaData metadata = MetaDataBuilder.builder("curator").build(); public sealed interface TestOutcome { - record Success(String value) implements TestOutcome {} - record Failure(Supplier exceptionSupplier) implements TestOutcome {} + record Ok(String value) implements TestOutcome {} + record Error(Supplier exceptionSupplier) implements TestOutcome {} } public record TestIdvlDescription(String description, PpktIndividual ppktIndividual, TestOutcome expectedOutcome) {} diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGeneratorTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGeneratorTest.java new file mode 100644 index 0000000..4cb5cfc --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGeneratorTest.java @@ -0,0 +1,4 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.english; + +public class EnglishPromptGeneratorTest { +} diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java index 752a468..38a8b5c 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java @@ -1,10 +1,7 @@ package org.monarchinitiative.phenopacket2prompt.output.impl.english; -import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; -import org.monarchinitiative.phenopacket2prompt.model.Iso8601Age; -import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; @@ -23,9 +20,9 @@ public class PpktIndividualEnglishTest extends PPKtIndividualBase{ private static Stream testGetIndividualDescription() { return Stream.of( new TestIdvlDescription("46 year olf female, infantile onset", - female46yearsInfantileOnset(), new TestOutcome.Success("The proband was a 46-year old woman who presented as an infant with")), + female46yearsInfantileOnset(), new TestOutcome.Ok("The proband was a 46-year old woman who presented as an infant with")), new TestIdvlDescription("male 4 months, congenital onset", - male4monthsCongenitalOnset(), new TestOutcome.Success("The proband was a 4-month old male infant who presented at birth with")) + male4monthsCongenitalOnset(), new TestOutcome.Ok("The proband was a 4-month old male infant who presented at birth with")) ); } @@ -37,10 +34,10 @@ void testEvaluateExpression(TestIdvlDescription testCase) { PPKtIndividualInfoGenerator generator = new PpktIndividualEnglish(); PpktIndividual ppkti = testCase.ppktIndividual(); switch (testCase.expectedOutcome()) { - case TestOutcome.Success(String expectedResult) -> + case TestOutcome.Ok(String expectedResult) -> assertEquals(expectedResult, generator.getIndividualDescription(ppkti), "Incorrect evaluation for: " + testCase.description()); - case TestOutcome.Failure(Supplier exceptionSupplier) -> + case TestOutcome.Error(Supplier exceptionSupplier) -> assertThrows(exceptionSupplier.get().getClass(), () -> generator.getIndividualDescription(ppkti), "Incorrect error handling for: " + testCase.description()); @@ -52,11 +49,11 @@ void testEvaluateExpression(TestIdvlDescription testCase) { private static Stream testGetPPKtSex() { return Stream.of( new TestIdvlHeShe("female", - PhenopacketSex.FEMALE, new TestOutcome.Success("she")), + PhenopacketSex.FEMALE, new TestOutcome.Ok("she")), new TestIdvlHeShe("male", - PhenopacketSex.MALE, new TestOutcome.Success("he")), + PhenopacketSex.MALE, new TestOutcome.Ok("he")), new TestIdvlHeShe("proband", - PhenopacketSex.UNKNOWN, new TestOutcome.Success("the individual")) + PhenopacketSex.UNKNOWN, new TestOutcome.Ok("the individual")) ); } @@ -65,9 +62,9 @@ private static Stream testGetPPKtSex() { void testPPKtSex(TestIdvlHeShe testCase) { PPKtIndividualInfoGenerator generator = new PpktIndividualEnglish(); switch (testCase.expectedOutcome()) { - case TestOutcome.Success(String expectedResult) -> + case TestOutcome.Ok(String expectedResult) -> assertEquals(expectedResult, generator.heSheIndividual(testCase.ppktSex())); - case TestOutcome.Failure(Supplier exceptionSupplier) -> + case TestOutcome.Error(Supplier exceptionSupplier) -> assertThrows(exceptionSupplier.get().getClass(), () -> generator.heSheIndividual(testCase.ppktSex()), "Incorrect error handling for: " + testCase.description()); @@ -84,13 +81,13 @@ void testPPKtSex(TestIdvlHeShe testCase) { private static Stream testIndlAtAge() { return Stream.of( new TestIdvlAtAge("congenital", - congenital, new TestOutcome.Success("At birth")), + congenital, new TestOutcome.Ok("At birth")), new TestIdvlAtAge("infantile", - infantile, new TestOutcome.Success("During the infantile period")), + infantile, new TestOutcome.Ok("During the infantile period")), new TestIdvlAtAge("childhood age", - childhood, new TestOutcome.Success("During childhood")), + childhood, new TestOutcome.Ok("During childhood")), new TestIdvlAtAge("46 years old", - p46y, new TestOutcome.Success("At an age of 46 years")) + p46y, new TestOutcome.Ok("At an age of 46 years")) ); } @@ -100,9 +97,9 @@ private static Stream testIndlAtAge() { void testPPKtSex(TestIdvlAtAge testCase) { PPKtIndividualInfoGenerator generator = new PpktIndividualEnglish(); switch (testCase.expectedOutcome()) { - case TestOutcome.Success(String expectedResult) -> + case TestOutcome.Ok(String expectedResult) -> assertEquals(expectedResult, generator.atAge(testCase.ppktAge())); - case TestOutcome.Failure(Supplier exceptionSupplier) -> + case TestOutcome.Error(Supplier exceptionSupplier) -> assertThrows(exceptionSupplier.get().getClass(), () -> generator.atAge(testCase.ppktAge()), "Incorrect error handling for: " + testCase.description()); From ed575404e57d1a5b79bdf598a0b59de43b39f009 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Thu, 30 May 2024 14:52:27 +0200 Subject: [PATCH 22/39] refactoring testing --- pom.xml | 2 +- .../cmd/GbtTranslateBatchCommand.java | 33 +++++++-- .../cmd/GptTranslateCommand.java | 2 +- .../HpInternationalOboParser.java | 6 +- .../output/PhenopacketTextGenerator.java | 25 ------- .../PpktPhenotypicFeatureGenerator.java | 24 +++++-- .../output/PromptGenerator.java | 9 ++- .../impl/dutch/DutchPromptGenerator.java | 6 ++ .../dutch/PpktPhenotypicfeatureDutch.java | 15 +++-- .../impl/english/EnglishPromptGenerator.java | 7 +- .../english/PpktPhenotypicFeatureEnglish.java | 50 ++++++++++++++ .../english/PpktPhenotypicfeatureEnglish.java | 67 ------------------- .../impl/german/GermanPromptGenerator.java | 6 +- .../german/PpktPhenotypicfeatureGerman.java | 23 ++++--- .../spanish/PpktPhenotypicfeatureSpanish.java | 20 +++--- .../impl/spanish/SpanishPromptGenerator.java | 5 +- .../output/PPKtIndividualBase.java | 21 ++++-- .../english/EnglishPromptGeneratorTest.java | 6 ++ .../english/PpktIndividualEnglishTest.java | 8 +-- .../PpktPhenotypicFeatureEnglishTest.java | 49 ++++++++++++++ 20 files changed, 231 insertions(+), 153 deletions(-) create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglish.java delete mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicfeatureEnglish.java create mode 100644 src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglishTest.java diff --git a/pom.xml b/pom.xml index 97153d4..4a329b1 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.monarchinitiative phenopacket2prompt - 0.3.14 + 0.3.15 phenopacket2prompt https://github.com/monarch-initiative/phenopacket2prompt diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java index a79a0b4..a46f030 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java @@ -23,6 +23,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.Callable; @CommandLine.Command(name = "batch", aliases = {"B"}, @@ -43,6 +44,9 @@ public class GbtTranslateBatchCommand implements Callable { @CommandLine.Option(names = {"-d", "--dir"}, description = "Path to directory with JSON phenopacket files", required = true) private String ppktDir; + private String currentLanguageCode = null; + private int currentCount; + @Override public Integer call() throws Exception { File hpJsonFile = new File(hpoJsonPath); @@ -64,19 +68,28 @@ public Integer call() throws Exception { List correctResultList = outputPromptsEnglish(ppktFiles, hpo); // output all non-English languages here PromptGenerator spanish = PromptGenerator.spanish(hpo, internationalMap.get("es")); + resetOutput("es"); outputPromptsInternational(ppktFiles, hpo, "es", spanish); - + resetOutput("nl"); PromptGenerator dutch = PromptGenerator.dutch(hpo, internationalMap.get("nl")); outputPromptsInternational(ppktFiles, hpo, "nl", dutch); - + resetOutput("de"); PromptGenerator german = PromptGenerator.german(hpo, internationalMap.get("de")); outputPromptsInternational(ppktFiles, hpo, "de", german); - + resetOutput("finished"); // output file with correct diagnosis list outputCorrectResults(correctResultList); return 0; } + private void resetOutput(String es) { + if (currentLanguageCode != null) { + System.out.printf("Finished writing %d phenopackets in %s\n", currentCount, currentLanguageCode); + } + currentLanguageCode = es; + currentCount = 0; + } + private void outputCorrectResults(List correctResultList) { File outfile = new File("prompts" + File.separator + "correct_results.tsv"); try (BufferedWriter bw = new BufferedWriter(new FileWriter(outfile))) { @@ -115,7 +128,14 @@ private void outputPromptsInternational(List ppktFiles, Ontology hpo, Stri String prompt = generator.createPrompt(individual); outputPrompt(prompt, promptFileName, dirpath); } catch (Exception e) { - e.printStackTrace(); + System.err.printf("[ERROR] Could not process %s: %s\n", promptFileName, e.getMessage()); + //e.printStackTrace(); + } + } + Set missing = generator.getMissingTranslations(); + if (! missing.isEmpty()) { + for (var m : missing) { + System.out.printf("[%s] Missing: %s\n", languageCode, m); } } } @@ -124,7 +144,7 @@ private void outputPromptsInternational(List ppktFiles, Ontology hpo, Stri private List outputPromptsEnglish(List ppktFiles, Ontology hpo) { createDir("prompts/en"); List correctResultList = new ArrayList<>(); - PromptGenerator generator = PromptGenerator.english(hpo); + PromptGenerator generator = PromptGenerator.english(); for (var f: ppktFiles) { PpktIndividual individual = PpktIndividual.fromFile(f); @@ -157,7 +177,8 @@ private void outputPrompt(String prompt, String promptFileName, String dir) { } catch (IOException e) { e.printStackTrace(); } - System.out.print("."); + System.out.printf("%s %d.\r", currentLanguageCode, currentCount); + currentCount++; } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java index 6efb83c..f91d085 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java @@ -54,7 +54,7 @@ public Integer call() throws Exception { System.out.println(hpo.version().orElse("n/a")); - PromptGenerator generator = PromptGenerator.english(hpo); + PromptGenerator generator = PromptGenerator.english(); PpktIndividual individual = PpktIndividual.fromFile(new File(ppkt)); String prompt = generator.createPrompt(individual); System.out.println(prompt); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java index 4931986..7a35d1a 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java @@ -86,7 +86,7 @@ public HpInternationalOboParser(File file) { String language = opt.get(); languageToInternationalMap.get(language).addTerm(currentHpoTermId, hpoLabel); } else { - System.err.printf("[ERROR] Could not extract language for %s.", line); + System.err.printf("[ERROR] Could not extract language for %s.\n", line); } } @@ -98,13 +98,13 @@ public HpInternationalOboParser(File file) { } catch (IOException e) { e.printStackTrace(); } - for (String language : languageToInternationalMap.keySet()) { + /*for (String language : languageToInternationalMap.keySet()) { System.out.println(language); HpInternational international = languageToInternationalMap.get(language); for (var entry : international.getTermIdToLabelMap().entrySet()) { System.out.printf("\t%s: %s\n", entry.getKey().getValue(), entry.getValue()); } - } + }*/ } public Map getLanguageToInternationalMap() { diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketTextGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketTextGenerator.java index b850365..250b66c 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketTextGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketTextGenerator.java @@ -1,7 +1,5 @@ package org.monarchinitiative.phenopacket2prompt.output; -import java.util.List; - public interface PhenopacketTextGenerator { @@ -9,27 +7,4 @@ public interface PhenopacketTextGenerator { String QUERY_HEADER(); - - /** - * @param items a list of HPO labels, e.g., X and Y and Z - * @return A string formatted as X, Y, and Z. - */ - default String getOxfordCommaList(List items, String andWord) { - if (items.size() == 2) { - // no comma if we just have two items. - // one item will work with the below code - String andWithSpace = String.format(" %s ", andWord); - return String.join(andWithSpace, items) + "."; - } - StringBuilder sb = new StringBuilder(); - String symList = String.join(", ", items); - int jj = symList.lastIndexOf(", "); - if (jj > 0) { - String andWithSpaceAndComma = String.format(", %s ", andWord); - symList = symList.substring(0, jj) + andWithSpaceAndComma + symList.substring(jj+2); - } - sb.append(symList).append("."); - return sb.toString(); - } - } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java index da7ed93..5dd3851 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java @@ -3,19 +3,31 @@ import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; import java.util.List; +import java.util.Set; +import java.util.function.Predicate; public interface PpktPhenotypicFeatureGenerator { // Please let's use the two functions below instead! - //@Deprecated(forRemoval = true) - String formatFeatures( List ontologyTerms); + String formatFeatures(List ontologyTerms); - default String formatObservedFeatures(List oterms) { - return ""; // TODO implement in each implementing class, using defualt for now so as not to break code + + default List getObservedFeaturesAsStr(List oterms) { + return oterms.stream() + .filter(Predicate.not(OntologyTerm::isExcluded)) + .map(OntologyTerm::getLabel) + .toList(); + } + + default List getExcludedFeaturesAsStr(List oterms) { + return oterms.stream() + .filter(OntologyTerm::isExcluded) + .map(OntologyTerm::getLabel) + .toList(); } - default String formatExcludedFeatures(List oterms) { - return ""; // TODO implement in each implementing class, using defualt for now so as not to break code + default Set getMissingTranslations() { + return Set.of(); } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java index 85ee97c..e7ceb8c 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java @@ -14,6 +14,7 @@ import java.util.List; import java.util.Map; +import java.util.Set; public interface PromptGenerator { @@ -27,8 +28,8 @@ public interface PromptGenerator { String getVignetteAtAge(PhenopacketAge page, PhenopacketSex psex, List terms); - public static PromptGenerator english(Ontology ontology){ - return new EnglishPromptGenerator(ontology); + static PromptGenerator english(){ + return new EnglishPromptGenerator(); } static PromptGenerator spanish(Ontology hpo, HpInternational international) { @@ -74,7 +75,9 @@ default String createPrompt(PpktIndividual individual) { } - + default Set getMissingTranslations() { + return Set.of(); + } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java index af00045..a7ad7cc 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java @@ -11,6 +11,7 @@ import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator; import java.util.List; +import java.util.Set; public class DutchPromptGenerator implements PromptGenerator { @@ -54,4 +55,9 @@ public String getVignetteAtAge(PhenopacketAge page, PhenopacketSex psex, List getMissingTranslations() { + return this.ppktPhenotypicFeatureGenerator.getMissingTranslations(); + } } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktPhenotypicfeatureDutch.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktPhenotypicfeatureDutch.java index 3fe0385..5e635fd 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktPhenotypicfeatureDutch.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktPhenotypicfeatureDutch.java @@ -4,18 +4,17 @@ import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; -import java.util.Set; +import java.util.*; import java.util.function.Predicate; public class PpktPhenotypicfeatureDutch implements PpktPhenotypicFeatureGenerator { private final HpInternational dutch; + private Set missingTranslations; public PpktPhenotypicfeatureDutch(HpInternational international) { dutch = international; + missingTranslations = new HashSet<>(); } @@ -27,7 +26,8 @@ private List getTranslations(List ontologyTerms) { if (opt.isPresent()) { labels.add(opt.get()); } else { - System.err.printf("[ERROR] Could not find %s translation for %s (%s).\n", dutch.getLanguageAcronym(), term.getLabel(), term.getTid().getValue()); + String missing = String.format(" %s (%s)", term.getLabel(), term.getTid().getValue()); + missingTranslations.add(missing); } } return labels; @@ -75,7 +75,7 @@ public String formatFeatures(List ontologyTerms) { if (excludedLabels.size() > 1) { return String.format("dus %s zijn uitgesloten.", getOxfordCommaList(excludedLabels)); } else { - return String.format("Dus %s werd uitgesloten.",excludedLabels.get(0)); + return String.format("Dus %s werd uitgesloten.",excludedLabels.getFirst()); } } else { String exclusion; @@ -87,4 +87,7 @@ public String formatFeatures(List ontologyTerms) { return getOxfordCommaList(observedLabels) + exclusion; } } + public Set getMissingTranslations() { + return missingTranslations; + } } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGenerator.java index 3218182..765be68 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGenerator.java @@ -1,6 +1,5 @@ package org.monarchinitiative.phenopacket2prompt.output.impl.english; -import org.monarchinitiative.phenol.ontology.data.Ontology; import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; @@ -11,7 +10,6 @@ public class EnglishPromptGenerator implements PromptGenerator { - private final Ontology hpo; private final PPKtIndividualInfoGenerator ppktAgeGenerator; @@ -20,11 +18,10 @@ public class EnglishPromptGenerator implements PromptGenerator { private final PpktPhenotypicFeatureGenerator ppktPhenotypicFeatureGenerator; - public EnglishPromptGenerator(Ontology hpo){ - this.hpo = hpo; + public EnglishPromptGenerator(){ ppktAgeGenerator = new PpktIndividualEnglish(); ppktTextGenerator = new PpktTextEnglish(); - this.ppktPhenotypicFeatureGenerator = new PpktPhenotypicfeatureEnglish(); + this.ppktPhenotypicFeatureGenerator = new PpktPhenotypicFeatureEnglish(); } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglish.java new file mode 100644 index 0000000..05083af --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglish.java @@ -0,0 +1,50 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.english; + +import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; +import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; + +import java.util.List; + +public class PpktPhenotypicFeatureEnglish implements PpktPhenotypicFeatureGenerator { + + + private String getOxfordCommaList(List items) { + if (items.size() == 1) { + return items.getFirst(); + } + if (items.size() == 2) { + // no comma if we just have two items. + // one item will work with the below code + return String.join(" and ", items); + } + String symList = String.join(", ", items); + int jj = symList.lastIndexOf(", "); + if (jj > 0) { + symList = symList.substring(0, jj) + ", and " + symList.substring(jj+2); + } + return symList; + } + + /** + * format features + * The proband was a 39-year old woman who presented at the age of 12 years with HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. + * The patient presented with [list of symptoms]. However, [excluded symptoms] were not observed." + */ + @Override + public String formatFeatures(List ontologyTerms) { + List observed = getObservedFeaturesAsStr(ontologyTerms); + List excluded = getExcludedFeaturesAsStr(ontologyTerms); + if (observed.isEmpty() && excluded.isEmpty()) { + return "no phenotypic abnormalities."; // should never happen, actually! + } else if (excluded.isEmpty()) { + return getOxfordCommaList(observed) + ". "; + } else if (observed.isEmpty()) { + return "the following manifestations that were excluded: " + getOxfordCommaList(excluded) + ". "; + } else { + String exclusion = String.format("However, %s %s excluded.", getOxfordCommaList(excluded), excluded.size() > 1 ? " were" : "was"); + return getOxfordCommaList(observed) + ". " + exclusion; + } + } + + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicfeatureEnglish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicfeatureEnglish.java deleted file mode 100644 index 908c840..0000000 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicfeatureEnglish.java +++ /dev/null @@ -1,67 +0,0 @@ -package org.monarchinitiative.phenopacket2prompt.output.impl.english; - -import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; -import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; - -import java.util.List; -import java.util.function.Predicate; - -public class PpktPhenotypicfeatureEnglish implements PpktPhenotypicFeatureGenerator { - - - private String getOxfordCommaList(List items) { - if (items.size() == 1) { - return items.get(0); - } - if (items.size() == 2) { - // no comma if we just have two items. - // one item will work with the below code - return String.join(" and ", items); - } - String symList = String.join(", ", items); - int jj = symList.lastIndexOf(", "); - if (jj > 0) { - symList = symList.substring(0, jj) + ", and " + symList.substring(jj+2); - } - return symList; - } - - /** - * format features - * The proband was a 39-year old woman who presented at the age of 12 years with HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. - */ - @Override - public String formatFeatures(List ontologyTerms) { - List observed = ontologyTerms.stream() - .filter(Predicate.not(OntologyTerm::isExcluded)) - .map(OntologyTerm::getLabel).toList(); - List excluded = ontologyTerms.stream() - .filter(OntologyTerm::isExcluded) - .map(OntologyTerm::getLabel).toList(); - if (observed.isEmpty() && excluded.isEmpty()) { - return "no phenotypic abnormalities"; // should never happen, actually! - } else if (excluded.isEmpty()) { - return getOxfordCommaList(observed) + ". "; - } else if (observed.isEmpty()) { - return "exclusion of " + getOxfordCommaList(excluded) + "."; - } else { - String exclusion = String.format("%s %s excluded.", getOxfordCommaList(excluded), excluded.size() > 1 ? " were" : "was"); - return getOxfordCommaList(observed) + ", whereby " + exclusion; - } - } - - - @Override - public String formatObservedFeatures(List oterms) { - List observed = oterms.stream() - .filter(Predicate.not(OntologyTerm::isExcluded)) - .map(OntologyTerm::getLabel).toList(); - return getOxfordCommaList(observed); - - } - - @Override - public String formatExcludedFeatures(List oterms) { - return PpktPhenotypicFeatureGenerator.super.formatExcludedFeatures(oterms); - } -} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java index 741939d..d7b3dd6 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java @@ -11,6 +11,7 @@ import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator; import java.util.List; +import java.util.Set; public class GermanPromptGenerator implements PromptGenerator { @@ -55,7 +56,10 @@ public String getVignetteAtAge(PhenopacketAge page, PhenopacketSex psex, List getMissingTranslations() { + return this.ppktPhenotypicFeatureGenerator.getMissingTranslations(); + } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktPhenotypicfeatureGerman.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktPhenotypicfeatureGerman.java index 9923d2c..7a58d68 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktPhenotypicfeatureGerman.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktPhenotypicfeatureGerman.java @@ -4,29 +4,30 @@ import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; +import java.util.*; import java.util.function.Predicate; public class PpktPhenotypicfeatureGerman implements PpktPhenotypicFeatureGenerator { - private final HpInternational spanish; + private final HpInternational german; + private Set missingTranslations; + public PpktPhenotypicfeatureGerman(HpInternational international) { - spanish = international; + german = international; + missingTranslations = new HashSet<>(); } - private List getTranslations(List ontologyTerms) { List labels = new ArrayList<>(); for (var term: ontologyTerms) { - Optional opt = spanish.getLabel(term.getTid()); + Optional opt = german.getLabel(term.getTid()); if (opt.isPresent()) { labels.add(opt.get()); } else { - System.err.printf("[ERROR] Could not find %s translation for %s (%s).\n", spanish.getLanguageAcronym(), term.getLabel(), term.getTid().getValue()); + String missing = String.format(" %s (%s)", term.getLabel(), term.getTid().getValue()); + missingTranslations.add(missing); } } return labels; @@ -36,7 +37,7 @@ private List getTranslations(List ontologyTerms) { private String getOxfordCommaList(List items) { if (items.size() == 1) { - return items.get(0); + return items.getFirst(); } if (items.size() == 2) { // no comma if we just have two items. @@ -79,4 +80,8 @@ public String formatFeatures(List ontologyTerms) { return getOxfordCommaList(observedLabels) + exclusion; } } + + public Set getMissingTranslations() { + return missingTranslations; + } } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java index 435d772..6e59f52 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java @@ -5,10 +5,7 @@ import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; -import java.util.Set; +import java.util.*; import java.util.function.Predicate; public class PpktPhenotypicfeatureSpanish implements PpktPhenotypicFeatureGenerator { @@ -17,9 +14,10 @@ public class PpktPhenotypicfeatureSpanish implements PpktPhenotypicFeatureGenera public PpktPhenotypicfeatureSpanish(HpInternational international) { spanish = international; + missingTranslations = new HashSet<>(); } - + private Set missingTranslations; private List getTranslations(List ontologyTerms) { List labels = new ArrayList<>(); @@ -28,7 +26,8 @@ private List getTranslations(List ontologyTerms) { if (opt.isPresent()) { labels.add(opt.get()); } else { - System.err.printf("[ERROR] Could not find %s translation for %s (%s).\n", spanish.getLanguageAcronym(), term.getLabel(), term.getTid().getValue()); + String missing = String.format(" %s (%s)", term.getLabel(), term.getTid().getValue()); + missingTranslations.add(missing); } } return labels; @@ -39,7 +38,7 @@ private List getTranslations(List ontologyTerms) { private String getOxfordCommaList(List items) { if (items.size() == 1) { - return items.get(0); + return items.getFirst(); } if (items.size() == 2) { // no comma if we just have two items. @@ -76,7 +75,7 @@ public String formatFeatures(List ontologyTerms) { if (excludedLabels.size() > 1) { return String.format("por lo que se excluyeron %s.", getOxfordCommaList(excludedLabels)); } else { - return String.format("por lo que %s fue excluido.",excludedLabels.get(0)); + return String.format("por lo que %s fue excluido.",excludedLabels.getFirst()); } } else { String exclusion; @@ -88,4 +87,9 @@ public String formatFeatures(List ontologyTerms) { return getOxfordCommaList(observedLabels) + exclusion; } } + + public Set getMissingTranslations() { + return missingTranslations; + } + } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java index 1e626f8..d5bfa13 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java @@ -53,7 +53,10 @@ public String getVignetteAtAge(PhenopacketAge page, PhenopacketSex psex, List getMissingTranslations() { + return this.ppktPhenotypicFeatureGenerator.getMissingTranslations(); + } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java index b342e01..c0e17c8 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java @@ -6,25 +6,32 @@ import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; -import org.phenopackets.phenopackettools.builder.builders.DiseaseBuilder; -import org.phenopackets.phenopackettools.builder.builders.IndividualBuilder; -import org.phenopackets.phenopackettools.builder.builders.MetaDataBuilder; -import org.phenopackets.phenopackettools.builder.builders.TimeElements; +import org.phenopackets.phenopackettools.builder.builders.*; import org.phenopackets.schema.v2.core.Disease; import org.phenopackets.schema.v2.core.Individual; import org.phenopackets.schema.v2.core.MetaData; +import org.phenopackets.schema.v2.core.PhenotypicFeature; import java.util.function.Supplier; public class PPKtIndividualBase { private final static MetaData metadata = MetaDataBuilder.builder("curator").build(); + + private final static PhenotypicFeature atrophy = PhenotypicFeatureBuilder.builder("HP:0001272", "Cerebellar atrophy" ).infantileOnset().build(); + private final static PhenotypicFeature ataxia = PhenotypicFeatureBuilder.builder("HP:0001251", "Ataxia").infantileOnset().build(); + private final static PhenotypicFeature bradyphrenExcluded = PhenotypicFeatureBuilder.builder("HP:0031843", "Bradyphrenia").excluded().build(); + private final static PhenotypicFeature polydactyly = PhenotypicFeatureBuilder.builder("HP:0100259", "Postaxial polydactyly").congenitalOnset().build(); + + + + public sealed interface TestOutcome { record Ok(String value) implements TestOutcome {} record Error(Supplier exceptionSupplier) implements TestOutcome {} } - public record TestIdvlDescription(String description, PpktIndividual ppktIndividual, TestOutcome expectedOutcome) {} + public record TestIndividual(String description, PpktIndividual ppktIndividual, TestOutcome expectedOutcome) {} public record TestIdvlHeShe(String description, PhenopacketSex ppktSex, TestOutcome expectedOutcome) {} @@ -38,7 +45,7 @@ public static PpktIndividual female46yearsInfantileOnset() { PhenopacketBuilder builder = PhenopacketBuilder.create("id1", metadata); Disease d = DiseaseBuilder.builder("OMIM:100123", "test").onset(TimeElements.infantileOnset()).build(); Individual subject = IndividualBuilder.builder("individual.1").female().ageAtLastEncounter("P46Y").build(); - builder.individual(subject).addDisease(d); + builder.individual(subject).addDisease(d).addPhenotypicFeature(atrophy).addPhenotypicFeature(ataxia).addPhenotypicFeature(bradyphrenExcluded); return new PpktIndividual(builder.build()); } @@ -46,7 +53,7 @@ public static PpktIndividual male4monthsCongenitalOnset() { PhenopacketBuilder builder = PhenopacketBuilder.create("id2", metadata); Disease d = DiseaseBuilder.builder("OMIM:100123", "test").onset(TimeElements.congenitalOnset()).build(); Individual subject = IndividualBuilder.builder("individual.2").male().ageAtLastEncounter("P4M").build(); - builder.individual(subject).addDisease(d); + builder.individual(subject).addDisease(d).addPhenotypicFeature(polydactyly); return new PpktIndividual(builder.build()); } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGeneratorTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGeneratorTest.java index 4cb5cfc..c28473a 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGeneratorTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGeneratorTest.java @@ -1,4 +1,10 @@ package org.monarchinitiative.phenopacket2prompt.output.impl.english; public class EnglishPromptGeneratorTest { + + + + + + } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java index 38a8b5c..ca1312b 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java @@ -17,11 +17,11 @@ public class PpktIndividualEnglishTest extends PPKtIndividualBase{ - private static Stream testGetIndividualDescription() { + private static Stream testGetIndividualDescription() { return Stream.of( - new TestIdvlDescription("46 year olf female, infantile onset", + new TestIndividual("46 year olf female, infantile onset", female46yearsInfantileOnset(), new TestOutcome.Ok("The proband was a 46-year old woman who presented as an infant with")), - new TestIdvlDescription("male 4 months, congenital onset", + new TestIndividual("male 4 months, congenital onset", male4monthsCongenitalOnset(), new TestOutcome.Ok("The proband was a 4-month old male infant who presented at birth with")) ); } @@ -30,7 +30,7 @@ private static Stream testGetIndividualDescription() { @ParameterizedTest @MethodSource("testGetIndividualDescription") - void testEvaluateExpression(TestIdvlDescription testCase) { + void testEvaluateExpression(TestIndividual testCase) { PPKtIndividualInfoGenerator generator = new PpktIndividualEnglish(); PpktIndividual ppkti = testCase.ppktIndividual(); switch (testCase.expectedOutcome()) { diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglishTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglishTest.java new file mode 100644 index 0000000..11b49d8 --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglishTest.java @@ -0,0 +1,49 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.english; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; + +import java.util.function.Supplier; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase.female46yearsInfantileOnset; +import static org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase.male4monthsCongenitalOnset; + +public class PpktPhenotypicFeatureEnglishTest { + + + + + private static Stream testGetIndividualPhenotypicFeatures() { + return Stream.of( + new PPKtIndividualBase.TestIndividual("46 year olf female, infantile onset", + female46yearsInfantileOnset(), new PPKtIndividualBase.TestOutcome.Ok("Cerebellar atrophy and Ataxia")), + new PPKtIndividualBase.TestIndividual("male 4 months, congenital onset", + male4monthsCongenitalOnset(), new PPKtIndividualBase.TestOutcome.Ok("Postaxial polydactyly")) + ); + } + +// builder.individual(subject).addDisease(d).addPhenotypicFeature(atrophy).addPhenotypicFeature(ataxia).addPhenotypicFeature(bradyphrenExcluded); + @ParameterizedTest + @MethodSource("testGetIndividualPhenotypicFeatures") + void testEvaluateExpression(PPKtIndividualBase.TestIndividual testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualEnglish(); + EnglishPromptGenerator gen = new EnglishPromptGenerator(); + PpktIndividual ppkti = testCase.ppktIndividual(); + switch (testCase.expectedOutcome()) { + case PPKtIndividualBase.TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, gen.formatFeatures(ppkti.getPhenotypicFeaturesAtOnset()), + "Incorrect evaluation for: " + testCase.description()); + case PPKtIndividualBase.TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.getIndividualDescription(ppkti), + "Incorrect error handling for: " + testCase.description()); + } + } + +} From 4fb664983154aa86f0369f51649cce5086d36827 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Thu, 30 May 2024 15:56:44 +0200 Subject: [PATCH 23/39] refactoring testing --- .../output/impl/english/PpktPhenotypicFeatureEnglish.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglish.java index 05083af..683afbd 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglish.java @@ -41,7 +41,7 @@ public String formatFeatures(List ontologyTerms) { } else if (observed.isEmpty()) { return "the following manifestations that were excluded: " + getOxfordCommaList(excluded) + ". "; } else { - String exclusion = String.format("However, %s %s excluded.", getOxfordCommaList(excluded), excluded.size() > 1 ? " were" : "was"); + String exclusion = String.format("However, %s %s excluded.", getOxfordCommaList(excluded), excluded.size() > 1 ? " were" : "was"); return getOxfordCommaList(observed) + ". " + exclusion; } } From 0c533b9288663610a7a2b92cad53089e9e003d68 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Thu, 30 May 2024 16:50:47 +0200 Subject: [PATCH 24/39] spanish testing --- .../impl/italian/ItalianPromptGenerator.java | 4 +- .../impl/italian/PpktIndividualItalian.java | 4 +- .../impl/spanish/PpktIndividualSpanish.java | 34 +++-- .../spanish/PpktIndividualSpanishTest.java | 116 ++++++++++++++++++ 4 files changed, 141 insertions(+), 17 deletions(-) create mode 100644 src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java index 8dae68c..3ceca76 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java @@ -5,7 +5,7 @@ import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; -import org.monarchinitiative.phenopacket2prompt.output.PhenopacketIndividualInformationGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; import org.monarchinitiative.phenopacket2prompt.output.PhenopacketTextGenerator; import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator; @@ -17,7 +17,7 @@ public class ItalianPromptGenerator implements PromptGenerator { private final Ontology hpo; - private final PhenopacketIndividualInformationGenerator ppktAgeSexGenerator; + private final PPKtIndividualInfoGenerator ppktAgeSexGenerator; private final PhenopacketTextGenerator ppktTextGenerator; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalian.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalian.java index 8e90f13..c06950a 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalian.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalian.java @@ -2,13 +2,13 @@ import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenopacket2prompt.model.*; -import org.monarchinitiative.phenopacket2prompt.output.PhenopacketIndividualInformationGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; import java.util.ArrayList; import java.util.List; import java.util.Optional; -public class PpktIndividualItalian implements PhenopacketIndividualInformationGenerator { +public class PpktIndividualItalian implements PPKtIndividualInfoGenerator { private static final String FEMALE_FETUS = "un feto femmina"; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java index 27b4515..c28b978 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java @@ -23,9 +23,9 @@ public class PpktIndividualSpanish implements PPKtIndividualInfoGenerator { private static final String MALE_INFANT = "un bebé masculino"; private static final String INFANT = "un bebé"; - private static final String FEMALE_CHILD = "una bambina"; - private static final String MALE_CHILD = "un bambino"; - private static final String CHILD = "un bambino"; + private static final String FEMALE_CHILD = "una niña"; + private static final String MALE_CHILD = "un niño"; + private static final String CHILD = "un niño"; private static final String FEMALE_ADOLESCENT = "un'adolescente femmina"; private static final String MALE_ADOLESCENT = "un adolescente maschio"; @@ -219,13 +219,13 @@ private String iso8601AtAgeOf(Iso8601Age isoAge) { } } - private String onsetTermAtAgeOf(HpoOnsetAge hpoOnsetTermAge) { + private String onsetTermAtAgeOf(HpoOnsetAge hpoOnsetTermAge, PhenopacketSex psex) { if (hpoOnsetTermAge.isFetus()) { return "en el período fetal"; } else if (hpoOnsetTermAge.isCongenital()) { - return "en el período neonatal"; + return "al nacer"; } else if (hpoOnsetTermAge.isInfant()) { - return "como un bebe"; + return "en el primer año de vida"; } else if (hpoOnsetTermAge.isChild()) { return "en la niñez"; } else if (hpoOnsetTermAge.isJuvenile()) { @@ -261,9 +261,10 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 }; } else if (m>0 || d> 0) { return switch (psex) { - case FEMALE -> String.format("una infante %s", iso8601ToMonthDay(iso8601Age)); - case MALE -> String.format("un infante %s", iso8601ToMonthDay(iso8601Age)); - default -> String.format("un infante %s", iso8601ToMonthDay(iso8601Age)); + // note that in Spanishm infante is up to 5 years + case FEMALE -> String.format("una bebé %s", iso8601ToMonthDay(iso8601Age)); + case MALE -> String.format("un bebé %s", iso8601ToMonthDay(iso8601Age)); + default -> String.format("un bebé %s", iso8601ToMonthDay(iso8601Age)); }; } else { return switch (psex) { @@ -317,7 +318,7 @@ private String hpoOnsetIndividualDescription(PhenopacketSex psex, HpoOnsetAge hp /** * A sentence such as The proband was a 39-year old woman who presented at the age of 12 years with * HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. This method returns the phrase that ends with "with" - * El sujeto era un niño de 1 año y 10 meses que se presentó como recién nacido con un filtrum largo. + * El sujeto era un niño de 1 año y 10 meses que se presentaba como recién nacido con un filtrum largo. * @param psex * @param lastExamAge * @param onsetAge @@ -341,15 +342,22 @@ private String onsetAndLastEncounterAvailable(PhenopacketSex psex, PhenopacketAg onsetDescription = iso8601AtAgeOf(isoAge); } else if (onsetAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) onsetAge; - onsetDescription = onsetTermAtAgeOf(hpoOnsetTermAge); + onsetDescription = onsetTermAtAgeOf(hpoOnsetTermAge, psex); } else { // should never happen throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); } - return String.format("El sujeto era %s que se presentó %s con", individualDescription, onsetDescription); + return switch (psex){ + case FEMALE -> String.format("La paciente era %s que se presentaba %s con", individualDescription, onsetDescription); + case MALE -> String.format("El paciente era %s que se presentaba %s con", individualDescription, onsetDescription); + default -> String.format("El paciente era %s que se presentaba %s con", individualDescription, onsetDescription); + }; } + + + /** * Age at last examination available but age of onset not available * The proband was a 39-year old woman who presented with HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. @@ -385,7 +393,7 @@ private String onsetAvailable(PhenopacketSex psex, PhenopacketAge onsetAge) { onsetDescription = iso8601AtAgeOf(isoAge); } else if (onsetAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) onsetAge; - onsetDescription = onsetTermAtAgeOf(hpoOnsetTermAge); + onsetDescription = onsetTermAtAgeOf(hpoOnsetTermAge, psex); } else { // should never happen throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java new file mode 100644 index 0000000..c8645e7 --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java @@ -0,0 +1,116 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.spanish; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; + +import java.util.function.Supplier; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public class PpktIndividualSpanishTest extends PPKtIndividualBase{ + + + + private static Stream testGetIndividualDescription() { + return Stream.of( + new TestIndividual("46 year olf female, infantile onset", + female46yearsInfantileOnset(), new TestOutcome.Ok("La paciente era mujer de 46 años que se presentaba en el primer año de vida con")), + new TestIndividual("male 4 months, congenital onset", + male4monthsCongenitalOnset(), new TestOutcome.Ok("El paciente era un bebé de 4 meses que se presentaba al nacer con")) + ); + } + + + + @ParameterizedTest + @MethodSource("testGetIndividualDescription") + void testEvaluateExpression(TestIndividual testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualSpanish(); + PpktIndividual ppkti = testCase.ppktIndividual(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, generator.getIndividualDescription(ppkti), + "Incorrect evaluation for: " + testCase.description()); + case TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.getIndividualDescription(ppkti), + "Incorrect error handling for: " + testCase.description()); + } + } + + + + private static Stream testGetPPKtSex() { + return Stream.of( + new TestIdvlHeShe("female", + PhenopacketSex.FEMALE, new TestOutcome.Ok("she")), + new TestIdvlHeShe("male", + PhenopacketSex.MALE, new TestOutcome.Ok("he")), + new TestIdvlHeShe("proband", + PhenopacketSex.UNKNOWN, new TestOutcome.Ok("the individual")) + ); + } + + @ParameterizedTest + @MethodSource("testGetPPKtSex") + void testPPKtSex(TestIdvlHeShe testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualSpanish(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, generator.heSheIndividual(testCase.ppktSex())); + case TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.heSheIndividual(testCase.ppktSex()), + "Incorrect error handling for: " + testCase.description()); + } + } + + + +//public record TestIdvlAtAge(String description, PhenopacketAge ppktAge, TestOutcome expectedOutcome) {} + + + + + private static Stream testIndlAtAge() { + return Stream.of( + new TestIdvlAtAge("congenital", + congenital, new TestOutcome.Ok("At birth")), + new TestIdvlAtAge("infantile", + infantile, new TestOutcome.Ok("During the infantile period")), + new TestIdvlAtAge("childhood age", + childhood, new TestOutcome.Ok("During childhood")), + new TestIdvlAtAge("46 years old", + p46y, new TestOutcome.Ok("At an age of 46 years")) + ); + } + + + @ParameterizedTest + @MethodSource("testIndlAtAge") + void testPPKtSex(TestIdvlAtAge testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualSpanish(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, generator.atAge(testCase.ppktAge())); + case TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.atAge(testCase.ppktAge()), + "Incorrect error handling for: " + testCase.description()); + } + + + } + + + + + + +} From cf0df021619c432df719b56eb305d16219de171f Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Fri, 31 May 2024 14:06:43 +0200 Subject: [PATCH 25/39] spanish testing --- .../cmd/GbtTranslateBatchCommand.java | 4 +- .../phenopacket2prompt/model/Iso8601Age.java | 4 +- .../model/PpktIndividual.java | 4 + .../PpktPhenotypicFeatureGenerator.java | 1 - .../impl/english/PpktIndividualEnglish.java | 3 + .../impl/german/PpktIndividualGerman.java | 4 +- .../impl/spanish/PpktIndividualSpanish.java | 13 +- .../output/PPKtIndividualBase.java | 32 ++++- .../english/PpktIndividualEnglishTest.java | 9 +- .../PpktPhenotypicFeatureEnglishTest.java | 1 - .../impl/german/PpktIndividualGermanTest.java | 120 ++++++++++++++++++ .../spanish/PpktIndividualSpanishTest.java | 13 +- 12 files changed, 187 insertions(+), 21 deletions(-) create mode 100644 src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java index eb3e9b4..32dc8d0 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java @@ -127,8 +127,8 @@ private void outputPromptsInternational(List ppktFiles, Ontology hpo, Stri PpktIndividual individual = PpktIndividual.fromFile(f); List diseaseList = individual.getDiseases(); if (diseaseList.size() != 1) { - System.err.printf("[ERROR] Got %d diseases for %s.\n", diseaseList.size(), individual.getPhenopacketId()); - continue; + String errmsg = String.format("[ERROR] Got %d diseases for %s.\n", diseaseList.size(), individual.getPhenopacketId()); + throw new PhenolRuntimeException(errmsg); } PhenopacketDisease pdisease = diseaseList.get(0); String promptFileName = getFileName( individual.getPhenopacketId(), languageCode); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java index 96f20fb..990174c 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java @@ -78,12 +78,12 @@ public PhenopacketAgeType ageType() { @Override public boolean isJuvenile() { - return years >= 10 && years < 18; + return years >= 6 && years < 18; } @Override public boolean isChild() { - return years >= 1 && years < 10; + return years >= 1 && years < 6; } @Override diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividual.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividual.java index ab5dc0b..7abd403 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividual.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividual.java @@ -213,4 +213,8 @@ public Map> getSpecifiedAgePhenotypicFeatures } return ageToFeatureMap; } + + public int annotationCount() { + return ppkt.getPhenotypicFeaturesCount(); + } } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java index 5dd3851..d4016c4 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java @@ -8,7 +8,6 @@ public interface PpktPhenotypicFeatureGenerator { - // Please let's use the two functions below instead! String formatFeatures(List ontologyTerms); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglish.java index d8cd917..90ec3cf 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglish.java @@ -16,6 +16,9 @@ public PpktIndividualEnglish() { public String getIndividualDescription(PpktIndividual individual) { + if (individual.annotationCount() == 0) { + throw new PhenolRuntimeException("No HPO annotations"); + } Optional lastExamOpt = individual.getAgeAtLastExamination(); Optional onsetOpt = individual.getAgeAtOnset(); PhenopacketSex psex = individual.getSex(); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java index 7536850..557f813 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java @@ -396,8 +396,8 @@ private String ageNotAvailable(PhenopacketSex psex) { @Override public String heSheIndividual(PhenopacketSex psex) { return switch (psex) { - case FEMALE -> "er"; - case MALE -> "sie"; + case FEMALE -> "sie"; + case MALE -> "er"; default -> "die Person"; }; } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java index c28b978..0f91cfe 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java @@ -148,6 +148,9 @@ private String atIsoAgeExact(PhenopacketAge ppktAge) { @Override public String getIndividualDescription(PpktIndividual individual) { + if (individual.annotationCount() == 0) { + throw new PhenolRuntimeException("No HPO annotations"); + } Optional lastExamOpt = individual.getAgeAtLastExamination(); Optional onsetOpt = individual.getAgeAtOnset(); PhenopacketSex psex = individual.getSex(); @@ -376,7 +379,7 @@ private String lastEncounterAvailable(PhenopacketSex psex, PhenopacketAge lastEx // should never happen throw new PhenolRuntimeException("Did not recognize last exam age type " + lastExamAge.ageType()); } - return String.format("El paciente era %s quien se presentó con", individualDescription); + return String.format("El paciente era %s quien se presentaba con", individualDescription); } /** @@ -398,14 +401,14 @@ private String onsetAvailable(PhenopacketSex psex, PhenopacketAge onsetAge) { // should never happen throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); } - return String.format("El paciente se presentó %s con", onsetDescription); + return String.format("El paciente se presentaba %s con", onsetDescription); } private String ageNotAvailable(PhenopacketSex psex) { return switch (psex) { - case FEMALE -> "La paciente se presentó con"; - case MALE -> "El paciente se presentó con"; - default -> "El paciente se presentó con"; + case FEMALE -> "La paciente se presentaba con"; + case MALE -> "El paciente se presentaba con"; + default -> "El paciente se presentaba con"; }; } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java index c0e17c8..46bcd88 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java @@ -19,10 +19,10 @@ public class PPKtIndividualBase { private final static PhenotypicFeature atrophy = PhenotypicFeatureBuilder.builder("HP:0001272", "Cerebellar atrophy" ).infantileOnset().build(); - private final static PhenotypicFeature ataxia = PhenotypicFeatureBuilder.builder("HP:0001251", "Ataxia").infantileOnset().build(); + private final static PhenotypicFeature ataxia = PhenotypicFeatureBuilder.builder("HP:0001251", "Ataxia").infantileOnset().build(); private final static PhenotypicFeature bradyphrenExcluded = PhenotypicFeatureBuilder.builder("HP:0031843", "Bradyphrenia").excluded().build(); private final static PhenotypicFeature polydactyly = PhenotypicFeatureBuilder.builder("HP:0100259", "Postaxial polydactyly").congenitalOnset().build(); - + private final static PhenotypicFeature hepatomegalyNoOnset = PhenotypicFeatureBuilder.builder("HP:0002240","Hepatomegaly").build(); @@ -57,6 +57,34 @@ public static PpktIndividual male4monthsCongenitalOnset() { return new PpktIndividual(builder.build()); } + public static PpktIndividual femaleNoAge() { + PhenopacketBuilder builder = PhenopacketBuilder.create("id3", metadata); + Disease d = DiseaseBuilder.builder("OMIM:100123", "test").build(); + Individual subject = IndividualBuilder.builder("individual.3").female().build(); + builder.individual(subject).addDisease(d).addPhenotypicFeature(hepatomegalyNoOnset); + return new PpktIndividual(builder.build()); + } + + /** + * Invalid phenopacket because no HPO annotationsa + * @return + */ + public static PpktIndividual femaleNoHPOs() { + PhenopacketBuilder builder = PhenopacketBuilder.create("id4", metadata); + Disease d = DiseaseBuilder.builder("OMIM:100123", "test").build(); + Individual subject = IndividualBuilder.builder("individual.4").female().build(); + return new PpktIndividual(builder.build()); + } + + public static PpktIndividual unknownSex4MonthOnset() { + PhenopacketBuilder builder = PhenopacketBuilder.create("id5", metadata); + Disease d = DiseaseBuilder.builder("OMIM:100123", "test").onset(TimeElements.childhoodOnset()).build(); + Individual subject = IndividualBuilder.builder("individual.5").unknownSex().build(); + builder.individual(subject).addDisease(d).addPhenotypicFeature(hepatomegalyNoOnset); + return new PpktIndividual(builder.build()); + } + + diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java index ca1312b..9992eb1 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java @@ -2,6 +2,7 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; +import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; @@ -22,7 +23,13 @@ private static Stream testGetIndividualDescription() { new TestIndividual("46 year olf female, infantile onset", female46yearsInfantileOnset(), new TestOutcome.Ok("The proband was a 46-year old woman who presented as an infant with")), new TestIndividual("male 4 months, congenital onset", - male4monthsCongenitalOnset(), new TestOutcome.Ok("The proband was a 4-month old male infant who presented at birth with")) + male4monthsCongenitalOnset(), new TestOutcome.Ok("The proband was a 4-month old male infant who presented at birth with")), + new TestIndividual("female, no onset", + femaleNoAge(), new TestOutcome.Ok("The proband was a female who presented with")), + new TestIndividual("female, no HPOs", + femaleNoHPOs(), new TestOutcome.Error(() -> new PhenolRuntimeException("No HPO annotations"))), + new TestIndividual("unknown sex, no 4mo", + unknownSex4MonthOnset(), new TestOutcome.Ok("The proband presented in childhood with")) ); } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglishTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglishTest.java index 11b49d8..73bf9c7 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglishTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglishTest.java @@ -28,7 +28,6 @@ private static Stream testGetIndividualPhenot ); } -// builder.individual(subject).addDisease(d).addPhenotypicFeature(atrophy).addPhenotypicFeature(ataxia).addPhenotypicFeature(bradyphrenExcluded); @ParameterizedTest @MethodSource("testGetIndividualPhenotypicFeatures") void testEvaluateExpression(PPKtIndividualBase.TestIndividual testCase) { diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java new file mode 100644 index 0000000..3044918 --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java @@ -0,0 +1,120 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.german; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; +import org.monarchinitiative.phenopacket2prompt.output.impl.spanish.PpktIndividualSpanish; + +import java.util.function.Supplier; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public class PpktIndividualGermanTest extends PPKtIndividualBase{ + + + + private static Stream testGetIndividualDescription() { + return Stream.of( + new TestIndividual("46 year olf female, infantile onset", + female46yearsInfantileOnset(), new TestOutcome.Ok("Die Patientin war eine 46jährige Frau, die sich im Säuglingsalter mit den folgenden Symptomen vorgestellt hat:")), + new TestIndividual("male 4 months, congenital onset", + male4monthsCongenitalOnset(), new TestOutcome.Ok("Der Patient war ein 4 Monate alter Säugling, der sich zum Zeitpunt der Geburt mit den folgenden Symptomen vorgestellt hart paciente era un bebé de 4 meses que se presentaba al nacer con")), + new TestIndividual("female, no onset", + femaleNoAge(), new TestOutcome.Ok("La paciente se presentaba con")), + new TestIndividual("female, no HPOs", + femaleNoHPOs(), new TestOutcome.Error(() -> new PhenolRuntimeException("No HPO annotations"))), + new TestIndividual("unknown sex, no 4mo", + unknownSex4MonthOnset(), new TestOutcome.Ok("El paciente se presentaba en la niñez con")) + ); + } + + + + @ParameterizedTest + @MethodSource("testGetIndividualDescription") + void testEvaluateExpression(TestIndividual testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualGerman(); + PpktIndividual ppkti = testCase.ppktIndividual(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, generator.getIndividualDescription(ppkti), + "Incorrect evaluation for: " + testCase.description()); + case TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.getIndividualDescription(ppkti), + "Incorrect error handling for: " + testCase.description()); + } + } + + + + private static Stream testGetPPKtSex() { + return Stream.of( + new TestIdvlHeShe("female", + PhenopacketSex.FEMALE, new TestOutcome.Ok("sie")), + new TestIdvlHeShe("male", + PhenopacketSex.MALE, new TestOutcome.Ok("er")), + new TestIdvlHeShe("proband", + PhenopacketSex.UNKNOWN, new TestOutcome.Ok("die Person")) + ); + } + + @ParameterizedTest + @MethodSource("testGetPPKtSex") + void testPPKtSex(TestIdvlHeShe testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualGerman(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, generator.heSheIndividual(testCase.ppktSex())); + case TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.heSheIndividual(testCase.ppktSex()), + "Incorrect error handling for: " + testCase.description()); + } + } + + + + + private static Stream testIndlAtAge() { + return Stream.of( + new TestIdvlAtAge("congenital", + congenital, new TestOutcome.Ok("zum Zeitpunkt der Geburt")), + new TestIdvlAtAge("infantile", + infantile, new TestOutcome.Ok("als Säugling")), + new TestIdvlAtAge("childhood age", + childhood, new TestOutcome.Ok("in der Kindheit")), + new TestIdvlAtAge("46 years old", + p46y, new TestOutcome.Ok("im Alter von 46 Jahren")) + ); + } + + + @ParameterizedTest + @MethodSource("testIndlAtAge") + void testPPKtSex(TestIdvlAtAge testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualGerman(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, generator.atAge(testCase.ppktAge())); + case TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.atAge(testCase.ppktAge()), + "Incorrect error handling for: " + testCase.description()); + } + + + } + + + + + + +} diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java index c8645e7..3fb3c15 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java @@ -2,6 +2,7 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; +import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase; @@ -22,7 +23,13 @@ private static Stream testGetIndividualDescription() { new TestIndividual("46 year olf female, infantile onset", female46yearsInfantileOnset(), new TestOutcome.Ok("La paciente era mujer de 46 años que se presentaba en el primer año de vida con")), new TestIndividual("male 4 months, congenital onset", - male4monthsCongenitalOnset(), new TestOutcome.Ok("El paciente era un bebé de 4 meses que se presentaba al nacer con")) + male4monthsCongenitalOnset(), new TestOutcome.Ok("El paciente era un bebé de 4 meses que se presentaba al nacer con")), + new TestIndividual("female, no onset", + femaleNoAge(), new TestOutcome.Ok("La paciente se presentaba con")), + new TestIndividual("female, no HPOs", + femaleNoHPOs(), new TestOutcome.Error(() -> new PhenolRuntimeException("No HPO annotations"))), + new TestIndividual("unknown sex, no 4mo", + unknownSex4MonthOnset(), new TestOutcome.Ok("El paciente se presentaba en la niñez con")) ); } @@ -73,10 +80,6 @@ void testPPKtSex(TestIdvlHeShe testCase) { -//public record TestIdvlAtAge(String description, PhenopacketAge ppktAge, TestOutcome expectedOutcome) {} - - - private static Stream testIndlAtAge() { return Stream.of( From 776119e0ccec8148a77c296aac8e69dc9193aba4 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Sat, 1 Jun 2024 15:35:50 +0200 Subject: [PATCH 26/39] fixing age descriptions --- .../cmd/GbtTranslateBatchCommand.java | 1 + .../PpktPhenotypicFeatureGenerator.java | 13 +++ .../impl/german/GermanPromptGenerator.java | 2 +- .../impl/german/PpktIndividualGerman.java | 104 +++++++++++------- .../german/PpktPhenotypicfeatureGerman.java | 25 ++--- .../output/PPKtIndividualBase.java | 18 ++- .../german/GermanPromptGeneratorTest.java | 46 ++++++++ .../impl/german/PpktIndividualGermanTest.java | 29 ++--- 8 files changed, 164 insertions(+), 74 deletions(-) create mode 100644 src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java index 32dc8d0..61604fd 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java @@ -76,6 +76,7 @@ public Integer call() throws Exception { resetOutput("nl"); PromptGenerator dutch = PromptGenerator.dutch(hpo, internationalMap.get("nl")); outputPromptsInternational(ppktFiles, hpo, "nl", dutch); + // GERMAN resetOutput("de"); PromptGenerator german = PromptGenerator.german(hpo, internationalMap.get("de")); outputPromptsInternational(ppktFiles, hpo, "de", german); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java index d4016c4..c8df74c 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java @@ -25,6 +25,19 @@ default List getExcludedFeaturesAsStr(List oterms) { .toList(); } + default List getObservedFeatures(List oterms) { + return oterms.stream() + .filter(Predicate.not(OntologyTerm::isExcluded)) + .toList(); + } + + default List getExcludedFeatures(List oterms) { + return oterms.stream() + .filter(OntologyTerm::isExcluded) + + .toList(); + } + default Set getMissingTranslations() { return Set.of(); } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java index d7b3dd6..48ce39c 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java @@ -52,7 +52,7 @@ public String formatFeatures(List ontologyTerms) { public String getVignetteAtAge(PhenopacketAge page, PhenopacketSex psex, List terms) { String ageString = this.ppktAgeSexGenerator.atAge(page); String features = formatFeatures(terms); - return String.format("%s, %s presentó %s", ageString, ppktAgeSexGenerator.heSheIndividual(psex), features); + return String.format("%s, präsentierte %s mit mit den folgenden Symptomen: %s", ageString, ppktAgeSexGenerator.heSheIndividual(psex), features); } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java index 557f813..215db3c 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java @@ -111,24 +111,26 @@ public String ageAndSexAtLastExamination(PpktIndividual individual) { } - private String atIsoAgeExact(PhenopacketAge ppktAge) { + private String imAlterVonIsoAgeExact(PhenopacketAge ppktAge) { Iso8601Age iso8601Age = (Iso8601Age) ppktAge; int y = iso8601Age.getYears(); int m = iso8601Age.getMonths(); int d = iso8601Age.getDays(); if (y > 10) { - return String.format("%d Jahre", y); + return String.format("Im Alter von %d %s", y, y>1?"Jahren" : "Jahr"); } else if (y > 0) { - if (m > 1) { - return String.format("%d Jahre und %d Monate", y, m); - } else if (m == 1) { - return String.format("%d Jahre und ein Monat", y); + if (m > 0) { + return String.format("Im Alter von %d %s und %d S", y, + y>1?"Jahren" : "Jahr", + m, m>1?"Monaten" : "Monat"); } else { - return String.format("%d Jahre", y); + return String.format("Im Alter von %d %s", y, y>1?"Jahren" : "Jahr"); } - } else if (m>0) { - return String.format("%d Monate y %d Tage", m, d); + } + if (m>0) { + return String.format("Im Alter von %d %s y %d %s", m, m, m>1?"Monaten" : "Monat", + d, d>1?"Tagen" : "Tag"); } else { return String.format("%d Tage", d); } @@ -137,6 +139,9 @@ private String atIsoAgeExact(PhenopacketAge ppktAge) { @Override public String getIndividualDescription(PpktIndividual individual) { + if (individual.annotationCount() == 0) { + throw new PhenolRuntimeException("No HPO annotations"); + } Optional lastExamOpt = individual.getAgeAtLastExamination(); Optional onsetOpt = individual.getAgeAtOnset(); PhenopacketSex psex = individual.getSex(); @@ -152,12 +157,35 @@ public String getIndividualDescription(PpktIndividual individual) { } - private String iso8601ToYearMonth(Iso8601Age iso8601Age) { + private String iso8601ToYearMonth(Iso8601Age iso8601Age, PhenopacketSex psex) { + int y = iso8601Age.getYears(); + int m = iso8601Age.getMonths(); + int d = iso8601Age.getDays(); + if (psex.equals(PhenopacketSex.MALE)) { + if (iso8601Age.getMonths() == 0) { + return String.format("ein %djähriger Junge", y); + } else { + return String.format("ein %d %s, %d %s alter Junge", y, y>1?"Jahre":"Jahr", m, m>1?"Monate":"Monat"); + } + } else if (psex.equals(PhenopacketSex.FEMALE)) { + if (iso8601Age.getMonths() == 0) { + return String.format("ein %djähriges Mädchen", y); + } else { + return String.format("ein %d %s, %d %s altes Mädchen", y, y>1?"Jahre":"Jahr", m, m>1?"Monate":"Monat"); + } + } if (iso8601Age.getMonths() == 0) { - return String.format("de %d años", iso8601Age.getYears()); + return String.format("ein %djähriges Kind", y); } else { - return String.format("de %d años y %d meses", iso8601Age.getYears(), iso8601Age.getMonths()); - } + return String.format("ein %d %s, %d %s altes Kind", y, y>1?"Jahre":"Jahr", m, m>1?"Monate":"Monat"); } + } + + private String monthString(int m) { + return m>1 ? "Monate": "Monat"; + } + + private String dayString(int d) { + return d>1 ? "Tage": "Tag"; } private String iso8601ToMonthDay(Iso8601Age iso8601Age) { @@ -166,9 +194,9 @@ private String iso8601ToMonthDay(Iso8601Age iso8601Age) { if (m == 0) { return String.format("de %d dias", d); } else if (d>0){ - return String.format("de %d meses y %d dias", m, d); + return String.format("%d %s und %d %s", m, monthString(m), d, dayString(d)); } else { - return String.format("de %d meses", m); + return String.format("%d %s", m, m>1 ? "Monate": "Monat"); } } @@ -197,9 +225,9 @@ private String iso8601AtAgeOf(Iso8601Age isoAge) { components.add("einem Tag"); } if (components.isEmpty()) { - return "im Neugeborenen Alter"; + return "bei der Geburt"; } else if (components.size() == 1) { - return "im Alter von " + components.get(0); + return "im Alter von " + components.getFirst(); } else if (components.size() == 2) { return "im Alter von " + components.get(0) + " und " + components.get(1); } else { @@ -212,13 +240,13 @@ private String onsetTermAtAgeOf(HpoOnsetAge hpoOnsetTermAge) { if (hpoOnsetTermAge.isFetus()) { return "in der Fetalperiode"; } else if (hpoOnsetTermAge.isCongenital()) { - return "im Neugeborenenalter"; + return "bei der Geburt"; } else if (hpoOnsetTermAge.isInfant()) { return "im Säuglingsalter"; } else if (hpoOnsetTermAge.isChild()) { return "in der Kindheit"; } else if (hpoOnsetTermAge.isJuvenile()) { - return "como adolescente"; + return "als Jugendlich adolescente"; } else { return "im Erwachsenenalter"; } @@ -232,9 +260,9 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 // if older if (y>17) { return switch (psex) { - case FEMALE -> String.format("eine %djährige Frau", y); - case MALE -> String.format("ein %djähriger Mann", y); - default -> String.format("eine %djährige Person", y); + case FEMALE -> String.format("eine %d-jährige Frau", y); + case MALE -> String.format("ein %d-jähriger Mann", y); + default -> String.format("eine %d-jährige Person", y); }; } else if (y>9) { return switch (psex) { @@ -244,15 +272,15 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 }; } else if (y>0) { return switch (psex) { - case FEMALE -> String.format("niña %s", iso8601ToYearMonth(iso8601Age)); - case MALE -> String.format("niño %s", iso8601ToYearMonth(iso8601Age)); - default -> String.format("niño %s", iso8601ToYearMonth(iso8601Age)); + case FEMALE -> iso8601ToYearMonth(iso8601Age, psex); + case MALE -> iso8601ToYearMonth(iso8601Age, psex); + default -> iso8601ToYearMonth(iso8601Age, psex); }; } else if (m>0 || d> 0) { return switch (psex) { - case FEMALE -> String.format("una infante %s", iso8601ToMonthDay(iso8601Age)); - case MALE -> String.format("un infante %s", iso8601ToMonthDay(iso8601Age)); - default -> String.format("un infante %s", iso8601ToMonthDay(iso8601Age)); + case FEMALE -> String.format("ein %s alter weiblicher Säugling", iso8601ToMonthDay(iso8601Age)); + case MALE -> String.format("ein %s alter Säugling", iso8601ToMonthDay(iso8601Age)); + default -> String.format("ein %s alter Säugling", iso8601ToMonthDay(iso8601Age)); }; } else { return switch (psex) { @@ -272,9 +300,9 @@ private String hpoOnsetIndividualDescription(PhenopacketSex psex, HpoOnsetAge hp }; } else if (hpoOnsetTermAge.isCongenital()) { return switch (psex) { - case FEMALE -> "una niña recién nacida"; - case MALE -> "un niño recién nacido"; - default -> "un bebe recién nacido"; + case FEMALE -> "Die Patientin war ein weibliches Neugeborenes, das sich"; + case MALE -> "Der Patient war ein männliches Neugeborenes, das sich"; + default -> "Der Patient war ein Neugeborenes, das sich"; }; } else if (hpoOnsetTermAge.isInfant()) { return switch (psex) { @@ -336,7 +364,7 @@ private String onsetAndLastEncounterAvailable(PhenopacketSex psex, PhenopacketAg throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); } return switch (psex) { - case FEMALE -> String.format("Die Probandin war %s, der sich %s mit den folgenden Symptomen vorgestellt hat: ", individualDescription, onsetDescription); + case FEMALE -> String.format("Die Probandin war %s, die sich %s mit den folgenden Symptomen vorgestellt hat: ", individualDescription, onsetDescription); default -> String.format("Der Proband war %s, der sich %s mit den folgenden Symptomen vorgestellt hat: ", individualDescription, onsetDescription); }; } @@ -405,15 +433,15 @@ public String heSheIndividual(PhenopacketSex psex) { @Override public String atAge(PhenopacketAge ppktAge) { if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { - return "im Alter von " + atIsoAgeExact(ppktAge); + return imAlterVonIsoAgeExact(ppktAge); } else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { String label = ppktAge.age(); // something like "Infantile onset" return switch (label) { - case "Infantile onset" -> "als Säugling"; - case "Childhood onset" -> "in der Kindheit"; - case "Neonatal onset" -> "in der neugeborenen Zeit"; - case "Congenital onset" -> "zum Zeitpunkt der Geburt"; - case "Adult onset" -> "im Erwachsenenalter"; + case "Infantile onset" -> "Als Säugling"; + case "Childhood onset" -> "In der Kindheit"; + case "Neonatal onset" -> "In der neugeborenen Zeit"; + case "Congenital onset" -> "Zum Zeitpunkt der Geburt"; + case "Adult onset" -> "Im Erwachsenenalter"; default-> String.format("TODO TODO el %s período", label.replace(" onset", "")); }; } else { diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktPhenotypicfeatureGerman.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktPhenotypicfeatureGerman.java index 7a58d68..c257aa8 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktPhenotypicfeatureGerman.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktPhenotypicfeatureGerman.java @@ -5,7 +5,6 @@ import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; import java.util.*; -import java.util.function.Predicate; public class PpktPhenotypicfeatureGerman implements PpktPhenotypicFeatureGenerator { @@ -35,7 +34,7 @@ private List getTranslations(List ontologyTerms) { - private String getOxfordCommaList(List items) { + private String getCommaList(List items) { if (items.size() == 1) { return items.getFirst(); } @@ -53,31 +52,23 @@ private String getOxfordCommaList(List items) { @Override public String formatFeatures(List ontologyTerms) { - List observedTerms = ontologyTerms.stream() - .filter(Predicate.not(OntologyTerm::isExcluded)) - .toList(); + List observedTerms = getObservedFeatures(ontologyTerms); + List excludedTerms = getExcludedFeatures(ontologyTerms); List observedLabels = getTranslations(observedTerms); - List excludedTerms = ontologyTerms.stream() - .filter(OntologyTerm::isExcluded).toList(); List excludedLabels = getTranslations(excludedTerms); if (observedLabels.isEmpty() && excludedLabels.isEmpty()) { return "keine phänotypischen Abnormalitäten"; // should never happen, actually! } else if (excludedLabels.isEmpty()) { - return getOxfordCommaList(observedLabels) + ". "; + return getCommaList(observedLabels) + ". "; } else if (observedLabels.isEmpty()) { if (excludedLabels.size() > 1) { - return String.format("Die folgenden Symptome wurden ausgeschlossen %s.", getOxfordCommaList(excludedLabels)); + return String.format("%s wurden ausgeschlossen.", getCommaList(excludedLabels)); } else { - return String.format("%s wurde ausgeschlossen.",excludedLabels.get(0)); + return String.format("%s wurde ausgeschlossen.",excludedLabels.getFirst()); } } else { - String exclusion; - if (excludedLabels.size() == 1) { - exclusion = String.format(", und %s wurde ausgeschlossen.", getOxfordCommaList(excludedLabels)); - } else { - exclusion = String.format(", und %s wurden ausgeschlossen.", getOxfordCommaList(excludedLabels)); - } - return getOxfordCommaList(observedLabels) + exclusion; + String exclusion = String.format("Dagegen %s %s ausgeschlossen.", excludedLabels.size()>1? "wurden":"wurde", getCommaList(excludedLabels)); + return getCommaList(observedLabels) + ". " + exclusion; } } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java index 46bcd88..a3e6c7a 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java @@ -12,6 +12,7 @@ import org.phenopackets.schema.v2.core.MetaData; import org.phenopackets.schema.v2.core.PhenotypicFeature; +import java.util.List; import java.util.function.Supplier; public class PPKtIndividualBase { @@ -23,7 +24,10 @@ public class PPKtIndividualBase { private final static PhenotypicFeature bradyphrenExcluded = PhenotypicFeatureBuilder.builder("HP:0031843", "Bradyphrenia").excluded().build(); private final static PhenotypicFeature polydactyly = PhenotypicFeatureBuilder.builder("HP:0100259", "Postaxial polydactyly").congenitalOnset().build(); private final static PhenotypicFeature hepatomegalyNoOnset = PhenotypicFeatureBuilder.builder("HP:0002240","Hepatomegaly").build(); - + private final static PhenotypicFeature lymphopenia = PhenotypicFeatureBuilder.builder("HP:0001888","Lymphopenia").iso8601onset("P3D").build(); + private final static PhenotypicFeature pneumonia = PhenotypicFeatureBuilder.builder("HP:0002090","Pneumonia").iso8601onset("P3D").build(); + private final static PhenotypicFeature igA = PhenotypicFeatureBuilder.builder("HP:0002720","Decreased circulating IgA level").iso8601onset("P3D").build(); + private final static PhenotypicFeature igM = PhenotypicFeatureBuilder.builder("HP:0002850","Decreased circulating total IgM").iso8601onset("P2Y").build(); public sealed interface TestOutcome { @@ -85,9 +89,21 @@ public static PpktIndividual unknownSex4MonthOnset() { } +/* +Der Proband war niño de 2 años, der sich im Alter von 3 Tagen mit den folgenden Symptomen vorgestellt hat: +Lymphopenia, Pneumonia und Severe combined immunodeficiency. im Alter von 1 Monate y 0 Tage, er presentó Decreased lymphocyte proliferation in response to mitogen, Decreased circulating IgA level und Decreased circulating total IgM. + */ + public static PpktIndividual twoYears() { + PhenopacketBuilder builder = PhenopacketBuilder.create("id6", metadata); + Disease d = DiseaseBuilder.builder("OMIM:100123", "test").onset(TimeElements.age("P3D")).build(); + Individual subject = IndividualBuilder.builder("individual.6").male().ageAtLastEncounter("P2Y").build(); + var features = List.of(lymphopenia, pneumonia, igA, igM); + builder.individual(subject).addDisease(d).addPhenotypicFeatures(features); + return new PpktIndividual(builder.build()); + } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java new file mode 100644 index 0000000..afde6be --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java @@ -0,0 +1,46 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.german; + +import org.junit.jupiter.api.Test; +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenol.io.OntologyLoader; +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenopacket2prompt.international.HpInternational; +import org.monarchinitiative.phenopacket2prompt.international.HpInternationalOboParser; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator; + +import java.io.File; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase.twoYears; + +public class GermanPromptGeneratorTest { + + + @Test + public void testCase() { + PpktIndividual i = twoYears(); + File hpJsonFile = new File("data/hp.json"); + if (! hpJsonFile.isFile()) { + throw new PhenolRuntimeException("Could not find hp.json at " + hpJsonFile.getAbsolutePath()); + } + Ontology hpo = OntologyLoader.loadOntology(hpJsonFile); + File translationsFile = new File("data/hp-international.obo"); + if (! translationsFile.isFile()) { + System.err.printf("Could not find translations file at %s. Try download command", translationsFile.getAbsolutePath()); + return ; + } + HpInternationalOboParser oboParser = new HpInternationalOboParser(translationsFile); + Map internationalMap = oboParser.getLanguageToInternationalMap(); + PromptGenerator german = PromptGenerator.german(hpo, internationalMap.get("de")); + String prompt = german.createPrompt(twoYears()); + + + System.out.println(prompt); + assertEquals("asdf", prompt); + } + + + +} diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java index 3044918..40cdc9e 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java @@ -7,7 +7,6 @@ import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase; import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; -import org.monarchinitiative.phenopacket2prompt.output.impl.spanish.PpktIndividualSpanish; import java.util.function.Supplier; import java.util.stream.Stream; @@ -22,16 +21,16 @@ public class PpktIndividualGermanTest extends PPKtIndividualBase{ private static Stream testGetIndividualDescription() { return Stream.of( new TestIndividual("46 year olf female, infantile onset", - female46yearsInfantileOnset(), new TestOutcome.Ok("Die Patientin war eine 46jährige Frau, die sich im Säuglingsalter mit den folgenden Symptomen vorgestellt hat:")), - new TestIndividual("male 4 months, congenital onset", - male4monthsCongenitalOnset(), new TestOutcome.Ok("Der Patient war ein 4 Monate alter Säugling, der sich zum Zeitpunt der Geburt mit den folgenden Symptomen vorgestellt hart paciente era un bebé de 4 meses que se presentaba al nacer con")), - new TestIndividual("female, no onset", - femaleNoAge(), new TestOutcome.Ok("La paciente se presentaba con")), - new TestIndividual("female, no HPOs", + female46yearsInfantileOnset(), new TestOutcome.Ok("Die Probandin war eine 46-jährige Frau, die sich im Säuglingsalter mit den folgenden Symptomen vorgestellt hat: ")), + new TestIndividual("male 4 months, congenital onset", + male4monthsCongenitalOnset(), new TestOutcome.Ok("Der Proband war ein 4 Monate alter Säugling, der sich bei der Geburt mit den folgenden Symptomen vorgestellt hat: ")), + new TestIndividual("female, no onset", + femaleNoAge(), new TestOutcome.Ok("Die Patientin stellte sich mit den folgenden Symptomen vor: ")), + new TestIndividual("female, no HPOs", femaleNoHPOs(), new TestOutcome.Error(() -> new PhenolRuntimeException("No HPO annotations"))), new TestIndividual("unknown sex, no 4mo", - unknownSex4MonthOnset(), new TestOutcome.Ok("El paciente se presentaba en la niñez con")) - ); + unknownSex4MonthOnset(), new TestOutcome.Ok("Der Patient stellte sich in der Kindheit mit den folgenden Symptomen vor: ")) + ); } @@ -85,13 +84,13 @@ void testPPKtSex(TestIdvlHeShe testCase) { private static Stream testIndlAtAge() { return Stream.of( new TestIdvlAtAge("congenital", - congenital, new TestOutcome.Ok("zum Zeitpunkt der Geburt")), + congenital, new TestOutcome.Ok("Zum Zeitpunkt der Geburt")), new TestIdvlAtAge("infantile", - infantile, new TestOutcome.Ok("als Säugling")), + infantile, new TestOutcome.Ok("Als Säugling")), new TestIdvlAtAge("childhood age", - childhood, new TestOutcome.Ok("in der Kindheit")), + childhood, new TestOutcome.Ok("In der Kindheit")), new TestIdvlAtAge("46 years old", - p46y, new TestOutcome.Ok("im Alter von 46 Jahren")) + p46y, new TestOutcome.Ok("Im Alter von 46 Jahren")) ); } @@ -108,13 +107,9 @@ void testPPKtSex(TestIdvlAtAge testCase) { () -> generator.atAge(testCase.ppktAge()), "Incorrect error handling for: " + testCase.description()); } - - } - - } From eb125e14e9f8ca7ffb2b62dc5fcf91075082eff6 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Sat, 1 Jun 2024 17:41:53 +0200 Subject: [PATCH 27/39] revising German translation --- .../cmd/GptTranslateCommand.java | 32 +++++----- .../impl/german/PpktIndividualGerman.java | 58 +++++++++---------- 2 files changed, 47 insertions(+), 43 deletions(-) diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java index f91d085..c8ff10f 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java @@ -34,6 +34,9 @@ public class GptTranslateCommand implements Callable { @CommandLine.Option(names = {"-p", "--ppkt"}, description = "Path to JSON phenopacket file", required = true) private String ppkt; + @CommandLine.Option(names = {"-l", "--language"}, description = "Language code", defaultValue = "de") + private String languageCode; + @Override public Integer call() throws Exception { @@ -58,21 +61,22 @@ public Integer call() throws Exception { PpktIndividual individual = PpktIndividual.fromFile(new File(ppkt)); String prompt = generator.createPrompt(individual); System.out.println(prompt); - // SPANISH - System.out.println("SPANISH"); - PromptGenerator spanish = PromptGenerator.spanish(hpo, internationalMap.get("es")); - prompt = spanish.createPrompt(individual); - System.out.println(prompt); - - // DUTCH - System.out.println("DUTCH"); - PromptGenerator dutch = PromptGenerator.dutch(hpo, internationalMap.get("nl")); - prompt = dutch.createPrompt(individual); + switch (languageCode) { + case "de" -> { + PromptGenerator german = PromptGenerator.german(hpo, internationalMap.get("de")); + prompt = german.createPrompt(individual); + } + case "es" -> { + PromptGenerator spanish = PromptGenerator.spanish(hpo, internationalMap.get("es")); + prompt = spanish.createPrompt(individual); + } + case "nl" -> { + PromptGenerator dutch = PromptGenerator.dutch(hpo, internationalMap.get("nl")); + prompt = dutch.createPrompt(individual); + } + default -> prompt = "did not recognize language code " + languageCode; + } - // GERMAN - System.out.println("GERMAN"); - PromptGenerator german = PromptGenerator.spanish(hpo, internationalMap.get("de")); - prompt = german.createPrompt(individual); System.out.println(prompt); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java index 215db3c..6411315 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java @@ -148,7 +148,7 @@ public String getIndividualDescription(PpktIndividual individual) { if (lastExamOpt.isPresent() && onsetOpt.isPresent()) { return onsetAndLastEncounterAvailable(psex, lastExamOpt.get(), onsetOpt.get()); } else if (lastExamOpt.isPresent()) { - return lastEncounterAvailable(psex, lastExamOpt.get()); + return latestEncounterAvailable(psex, lastExamOpt.get()); } else if (onsetOpt.isPresent()) { return onsetAvailable(psex, onsetOpt.get()); } else { @@ -260,15 +260,15 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 // if older if (y>17) { return switch (psex) { - case FEMALE -> String.format("eine %d-jährige Frau", y); - case MALE -> String.format("ein %d-jähriger Mann", y); - default -> String.format("eine %d-jährige Person", y); + case FEMALE -> String.format("Die Patientin war eine %d-jährige Frau", y); + case MALE -> String.format("Der Patient war ein %d-jähriger Mann", y); + default -> String.format("Der Patient war eine %d-jährige Person", y); }; } else if (y>9) { return switch (psex) { - case FEMALE -> String.format("una adolescente de %d años", y); - case MALE -> String.format("un adolescente de %d años", y); - default -> String.format("un adolescente de %d años", y); + case FEMALE -> String.format("Die Patientin war eine %d-jährige Jugendliche", y); + case MALE -> String.format("Der Patient war ein %d-jähriger Jugendlicher", y); + default -> String.format("Der Patient war ein %d-jähriger Jugendlicher", y); }; } else if (y>0) { return switch (psex) { @@ -278,15 +278,15 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 }; } else if (m>0 || d> 0) { return switch (psex) { - case FEMALE -> String.format("ein %s alter weiblicher Säugling", iso8601ToMonthDay(iso8601Age)); - case MALE -> String.format("ein %s alter Säugling", iso8601ToMonthDay(iso8601Age)); - default -> String.format("ein %s alter Säugling", iso8601ToMonthDay(iso8601Age)); + case FEMALE -> String.format("Die Patientin war ein %s alter weiblicher Säugling", iso8601ToMonthDay(iso8601Age)); + case MALE -> String.format("Der Patient war ein %s alter Säugling", iso8601ToMonthDay(iso8601Age)); + default -> String.format("Der Patient war ein %s alter Säugling", iso8601ToMonthDay(iso8601Age)); }; } else { return switch (psex) { - case FEMALE -> "recien nacida"; - case MALE -> "recien nacido"; - default -> "recien nacido"; + case FEMALE -> "Die Patientin war ein weibliches Neugeborenes"; + case MALE -> "Der Patient war ein männliches Neugeborenes"; + default -> "Der Patient war ein Neugeborenes"; }; } } @@ -306,27 +306,27 @@ private String hpoOnsetIndividualDescription(PhenopacketSex psex, HpoOnsetAge hp }; } else if (hpoOnsetTermAge.isInfant()) { return switch (psex) { - case FEMALE -> FEMALE_INFANT; - case MALE -> MALE_INFANT; - default -> INFANT; + case FEMALE -> "Die Patientin war ein weiblicher Säugling, der sich "; + case MALE -> "Der Patient war ein männlicher Säugling, der sich"; + default -> "Der Patient war ein Säugling, der sich"; }; } else if (hpoOnsetTermAge.isChild()) { return switch (psex) { - case FEMALE -> "niña"; - case MALE -> "niño"; - default -> "niño"; + case FEMALE -> "Die Patientin war ein Mädchen, das sich "; + case MALE -> "Der Patient war ein Junge, der sich"; + default -> "Der Patient war ein Kind, das sich"; }; } else if (hpoOnsetTermAge.isJuvenile()) { return switch (psex) { - case FEMALE -> "una adolescente femenina"; - case MALE -> "un adolescente masculino"; - default -> "un adolescente"; + case FEMALE -> "Die Patientin war eine Jugendliche, die sich"; + case MALE -> "Der Patient war ein Jugendlicher, der sich"; + default -> "Der Patient war ein Jugendlicher, der sich"; }; }else { return switch (psex) { - case FEMALE -> "eine Frau"; - case MALE -> "ein Mann"; - default -> "eine Person"; + case FEMALE -> "Die Patientin war eine Frau, die sich"; + case MALE -> "Der Patient war ein Mann, der sich"; + default -> "Der Patient war eine erwachsene Person nicht angegebenen Geschlechtes, die sich"; }; } } @@ -364,8 +364,8 @@ private String onsetAndLastEncounterAvailable(PhenopacketSex psex, PhenopacketAg throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); } return switch (psex) { - case FEMALE -> String.format("Die Probandin war %s, die sich %s mit den folgenden Symptomen vorgestellt hat: ", individualDescription, onsetDescription); - default -> String.format("Der Proband war %s, der sich %s mit den folgenden Symptomen vorgestellt hat: ", individualDescription, onsetDescription); + case FEMALE -> String.format("%s, die sich %s mit den folgenden Symptomen vorgestellt hat: ", individualDescription, onsetDescription); + default -> String.format("%s, der sich %s mit den folgenden Symptomen vorgestellt hat: ", individualDescription, onsetDescription); }; } @@ -376,7 +376,7 @@ private String onsetAndLastEncounterAvailable(PhenopacketSex psex, PhenopacketAg * @param psex * @param lastExamAge */ - private String lastEncounterAvailable(PhenopacketSex psex, PhenopacketAge lastExamAge) { + private String latestEncounterAvailable(PhenopacketSex psex, PhenopacketAge lastExamAge) { String individualDescription; if (lastExamAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { Iso8601Age isoAge = (Iso8601Age) lastExamAge; @@ -388,7 +388,7 @@ private String lastEncounterAvailable(PhenopacketSex psex, PhenopacketAge lastEx // should never happen throw new PhenolRuntimeException("Did not recognize last exam age type " + lastExamAge.ageType()); } - return String.format("El paciente era %s quien se presentó con", individualDescription); + return String.format("%s", individualDescription); } /** From f1376cdd58247dd014a17b6629456ad62811bd56 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Sat, 1 Jun 2024 17:51:51 +0200 Subject: [PATCH 28/39] output missing Italian translation --- pom.xml | 2 +- .../output/impl/german/PpktIndividualGerman.java | 13 +++++++++---- .../impl/italian/ItalianPromptGenerator.java | 16 ++++++---------- .../italian/PpktPhenotypicfeatureItalian.java | 16 +++++++++++----- 4 files changed, 27 insertions(+), 20 deletions(-) diff --git a/pom.xml b/pom.xml index 4a329b1..d78f0b4 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.monarchinitiative phenopacket2prompt - 0.3.15 + 0.3.16 phenopacket2prompt https://github.com/monarch-initiative/phenopacket2prompt diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java index 6411315..a749553 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java @@ -118,10 +118,10 @@ private String imAlterVonIsoAgeExact(PhenopacketAge ppktAge) { int d = iso8601Age.getDays(); if (y > 10) { - return String.format("Im Alter von %d %s", y, y>1?"Jahren" : "Jahr"); + return String.format("Im Alter von %d Jahren", y); } else if (y > 0) { if (m > 0) { - return String.format("Im Alter von %d %s und %d S", y, + return String.format("Im Alter von %d %s und %d %s", y, y>1?"Jahren" : "Jahr", m, m>1?"Monaten" : "Monat"); } else { @@ -129,7 +129,7 @@ private String imAlterVonIsoAgeExact(PhenopacketAge ppktAge) { } } if (m>0) { - return String.format("Im Alter von %d %s y %d %s", m, m, m>1?"Monaten" : "Monat", + return String.format("Im Alter von %d %s y %d %s", m, m>1?"Monaten" : "Monat", d, d>1?"Tagen" : "Tag"); } else { return String.format("%d Tage", d); @@ -388,7 +388,12 @@ private String latestEncounterAvailable(PhenopacketSex psex, PhenopacketAge last // should never happen throw new PhenolRuntimeException("Did not recognize last exam age type " + lastExamAge.ageType()); } - return String.format("%s", individualDescription); + if (psex.equals(PhenopacketSex.FEMALE)) { + return String.format("%s, die sich mit den folgenden Symptomen vorgestellt hat: ", individualDescription); + } else { + return String.format("%s, der sich mit den folgenden Symptomen vorgestellt hat: ", individualDescription); + } + } /** diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java index 3ceca76..6016570 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java @@ -11,12 +11,10 @@ import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator; import java.util.List; +import java.util.Set; public class ItalianPromptGenerator implements PromptGenerator { - private final Ontology hpo; - - private final PPKtIndividualInfoGenerator ppktAgeSexGenerator; private final PhenopacketTextGenerator ppktTextGenerator; @@ -25,8 +23,7 @@ public class ItalianPromptGenerator implements PromptGenerator { - public ItalianPromptGenerator(Ontology hpo, PpktPhenotypicFeatureGenerator pfgen) { - this.hpo = hpo; + public ItalianPromptGenerator(PpktPhenotypicFeatureGenerator pfgen) { ppktAgeSexGenerator = new PpktIndividualItalian(); ppktTextGenerator = new PpktTextItalian(); this.ppktPhenotypicFeatureGenerator = pfgen; @@ -54,10 +51,9 @@ public String getVignetteAtAge(PhenopacketAge page, PhenopacketSex psex, List getMissingTranslations() { + return this.ppktPhenotypicFeatureGenerator.getMissingTranslations(); + } } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktPhenotypicfeatureItalian.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktPhenotypicfeatureItalian.java index 2e0f187..1749941 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktPhenotypicfeatureItalian.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktPhenotypicfeatureItalian.java @@ -4,18 +4,20 @@ import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; -import java.util.Set; +import java.util.*; import java.util.function.Predicate; public class PpktPhenotypicfeatureItalian implements PpktPhenotypicFeatureGenerator { private final HpInternational italian; + private Set missingTranslations; + + + public PpktPhenotypicfeatureItalian(HpInternational international) { italian = international; + missingTranslations = new HashSet<>(); } @@ -27,7 +29,8 @@ private List getTranslations(List ontologyTerms) { if (opt.isPresent()) { labels.add(opt.get()); } else { - System.err.printf("[ERROR] Could not find %s translation for %s (%s).\n", italian.getLanguageAcronym(), term.getLabel(), term.getTid().getValue()); + String missing = String.format(" %s (%s)", term.getLabel(), term.getTid().getValue()); + missingTranslations.add(missing); } } return labels; @@ -87,4 +90,7 @@ public String formatFeatures(List ontologyTerms) { return getOxfordCommaList(observedLabels) + exclusion; } } + public Set getMissingTranslations() { + return missingTranslations; + } } From fe01eb46c37e9c91842bfd092f761eb97e75e162 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Thu, 6 Jun 2024 11:09:20 +0200 Subject: [PATCH 29/39] more testing --- .../cmd/GbtTranslateBatchCommand.java | 4 ++-- .../cmd/GptTranslateCommand.java | 2 +- .../output/PromptGenerator.java | 8 +++---- .../impl/german/GermanPromptGenerator.java | 7 +----- .../impl/spanish/PpktIndividualSpanish.java | 22 +++++++++++-------- .../german/GermanPromptGeneratorTest.java | 2 +- 6 files changed, 22 insertions(+), 23 deletions(-) diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java index 61604fd..438eb16 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java @@ -78,12 +78,12 @@ public Integer call() throws Exception { outputPromptsInternational(ppktFiles, hpo, "nl", dutch); // GERMAN resetOutput("de"); - PromptGenerator german = PromptGenerator.german(hpo, internationalMap.get("de")); + PromptGenerator german = PromptGenerator.german(internationalMap.get("de")); outputPromptsInternational(ppktFiles, hpo, "de", german); // ITALIAN resetOutput("it"); - PromptGenerator italian = PromptGenerator.italian(hpo, internationalMap.get("it")); + PromptGenerator italian = PromptGenerator.italian(internationalMap.get("it")); outputPromptsInternational(ppktFiles, hpo, "it", italian); resetOutput("finished"); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java index c8ff10f..542d166 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java @@ -63,7 +63,7 @@ public Integer call() throws Exception { System.out.println(prompt); switch (languageCode) { case "de" -> { - PromptGenerator german = PromptGenerator.german(hpo, internationalMap.get("de")); + PromptGenerator german = PromptGenerator.german(internationalMap.get("de")); prompt = german.createPrompt(individual); } case "es" -> { diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java index cc42783..a000fca 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java @@ -45,13 +45,13 @@ static PromptGenerator dutch(Ontology hpo, HpInternational international) { return new DutchPromptGenerator(hpo, pfgen); } - static PromptGenerator german(Ontology hpo, HpInternational international) { + static PromptGenerator german(HpInternational international) { PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureGerman(international); - return new GermanPromptGenerator(hpo, pfgen); + return new GermanPromptGenerator(pfgen); } - static PromptGenerator italian(Ontology hpo, HpInternational international) { + static PromptGenerator italian(HpInternational international) { PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureItalian(international); - return new ItalianPromptGenerator(hpo, pfgen); + return new ItalianPromptGenerator(pfgen); } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java index 48ce39c..74c700c 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java @@ -1,6 +1,5 @@ package org.monarchinitiative.phenopacket2prompt.output.impl.german; -import org.monarchinitiative.phenol.ontology.data.Ontology; import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; @@ -15,9 +14,6 @@ public class GermanPromptGenerator implements PromptGenerator { - private final Ontology hpo; - - private final PPKtIndividualInfoGenerator ppktAgeSexGenerator; private final PhenopacketTextGenerator ppktTextGenerator; @@ -26,8 +22,7 @@ public class GermanPromptGenerator implements PromptGenerator { - public GermanPromptGenerator(Ontology hpo, PpktPhenotypicFeatureGenerator pfgen) { - this.hpo = hpo; + public GermanPromptGenerator(PpktPhenotypicFeatureGenerator pfgen) { ppktAgeSexGenerator = new PpktIndividualGerman(); ppktTextGenerator = new PpktTextGerman(); this.ppktPhenotypicFeatureGenerator = pfgen; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java index 0f91cfe..383cec3 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java @@ -321,7 +321,7 @@ private String hpoOnsetIndividualDescription(PhenopacketSex psex, HpoOnsetAge hp /** * A sentence such as The proband was a 39-year old woman who presented at the age of 12 years with * HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. This method returns the phrase that ends with "with" - * El sujeto era un niño de 1 año y 10 meses que se presentaba como recién nacido con un filtrum largo. + * El sujeto era un niño de 1 año y 10 meses que se presentó como recién nacido con un filtrum largo. * @param psex * @param lastExamAge * @param onsetAge @@ -351,9 +351,9 @@ private String onsetAndLastEncounterAvailable(PhenopacketSex psex, PhenopacketAg throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); } return switch (psex){ - case FEMALE -> String.format("La paciente era %s que se presentaba %s con", individualDescription, onsetDescription); - case MALE -> String.format("El paciente era %s que se presentaba %s con", individualDescription, onsetDescription); - default -> String.format("El paciente era %s que se presentaba %s con", individualDescription, onsetDescription); + case FEMALE -> String.format("La paciente era %s que se presentó %s con", individualDescription, onsetDescription); + case MALE -> String.format("El paciente era %s que se presentó %s con", individualDescription, onsetDescription); + default -> String.format("El paciente era %s que se presentó %s con", individualDescription, onsetDescription); }; } @@ -379,7 +379,11 @@ private String lastEncounterAvailable(PhenopacketSex psex, PhenopacketAge lastEx // should never happen throw new PhenolRuntimeException("Did not recognize last exam age type " + lastExamAge.ageType()); } - return String.format("El paciente era %s quien se presentaba con", individualDescription); + if (psex.equals(PhenopacketSex.FEMALE)) { + return String.format("La paciente era %s que se presentó con", individualDescription); + } else { + return String.format("El paciente era %s qui se presentó con", individualDescription); + } } /** @@ -401,14 +405,14 @@ private String onsetAvailable(PhenopacketSex psex, PhenopacketAge onsetAge) { // should never happen throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); } - return String.format("El paciente se presentaba %s con", onsetDescription); + return String.format("El paciente se presentó %s con", onsetDescription); } private String ageNotAvailable(PhenopacketSex psex) { return switch (psex) { - case FEMALE -> "La paciente se presentaba con"; - case MALE -> "El paciente se presentaba con"; - default -> "El paciente se presentaba con"; + case FEMALE -> "La paciente se presentó con"; + case MALE -> "El paciente se presentó con"; + default -> "El paciente se presentó con"; }; } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java index afde6be..4363a91 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java @@ -33,7 +33,7 @@ public void testCase() { } HpInternationalOboParser oboParser = new HpInternationalOboParser(translationsFile); Map internationalMap = oboParser.getLanguageToInternationalMap(); - PromptGenerator german = PromptGenerator.german(hpo, internationalMap.get("de")); + PromptGenerator german = PromptGenerator.german(internationalMap.get("de")); String prompt = german.createPrompt(twoYears()); From 4ce6679685a0e550b64b4a81905255ac69a1b8c6 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Thu, 6 Jun 2024 11:56:21 +0200 Subject: [PATCH 30/39] more testing --- .../international/HpInternationalOboParser.java | 8 ++++++-- .../impl/german/GermanPromptGenerator.java | 5 ++++- .../impl/german/PpktIndividualGerman.java | 8 +++++--- .../impl/german/GermanPromptGeneratorTest.java | 17 +++++++++++++---- 4 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java index 7a35d1a..e674af7 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java @@ -27,7 +27,7 @@ public class HpInternationalOboParser { * @return in this case "tr" */ public Optional getLanguage(String annots) { - final String translation = "translation:language=\"(\\w{2,2})\""; + final String translation = "translation:language=\"(\\w{2,3})\""; final Pattern pattern = Pattern.compile(translation); Matcher matcher = pattern.matcher(annots); if (matcher.find()) { @@ -53,7 +53,7 @@ public Optional getTranslation(String annots) { public HpInternationalOboParser(File file) { languageToInternationalMap = new HashMap<>(); String pattern = "id: (HP:\\d{7,7})"; - Set acronyms = Set.of("cs", "en", "de", "it", "es", "fr", "ja", "nl", "nna", "tr", "tw", "zh"); + Set acronyms = Set.of("cs", "en", "de", "dtp", "it", "es", "fr", "ja", "nl", "nna", "tr", "tw", "zh"); for (String acronym : acronyms) { languageToInternationalMap.put(acronym, new HpInternational(acronym)); } @@ -84,6 +84,10 @@ public HpInternationalOboParser(File file) { Optional opt = getLanguage(annots); if (opt.isPresent()) { String language = opt.get(); + if (! languageToInternationalMap.containsKey(language)) { + System.err.println("[ERROR] Could not find language \"" + language + "\""); + continue; + } languageToInternationalMap.get(language).addTerm(currentHpoTermId, hpoLabel); } else { System.err.printf("[ERROR] Could not extract language for %s.\n", line); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java index 74c700c..d80869f 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java @@ -28,6 +28,9 @@ public GermanPromptGenerator(PpktPhenotypicFeatureGenerator pfgen) { this.ppktPhenotypicFeatureGenerator = pfgen; } + + + @Override public String queryHeader() { return ppktTextGenerator.QUERY_HEADER(); @@ -47,7 +50,7 @@ public String formatFeatures(List ontologyTerms) { public String getVignetteAtAge(PhenopacketAge page, PhenopacketSex psex, List terms) { String ageString = this.ppktAgeSexGenerator.atAge(page); String features = formatFeatures(terms); - return String.format("%s, präsentierte %s mit mit den folgenden Symptomen: %s", ageString, ppktAgeSexGenerator.heSheIndividual(psex), features); + return String.format("%s, präsentierte %s mit den folgenden Symptomen: %s", ageString, ppktAgeSexGenerator.heSheIndividual(psex), features); } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java index a749553..ccbdc7d 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java @@ -1,5 +1,6 @@ package org.monarchinitiative.phenopacket2prompt.output.impl.german; +import com.sun.source.tree.BreakTree; import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenopacket2prompt.model.*; import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; @@ -272,9 +273,10 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 }; } else if (y>0) { return switch (psex) { - case FEMALE -> iso8601ToYearMonth(iso8601Age, psex); - case MALE -> iso8601ToYearMonth(iso8601Age, psex); - default -> iso8601ToYearMonth(iso8601Age, psex); + case FEMALE -> String.format("Die Patientin war %s", iso8601ToYearMonth(iso8601Age, psex)); + case MALE -> + String.format("Der Patient war %s", iso8601ToYearMonth(iso8601Age, psex)); + default -> String.format("Der Patient war %s", iso8601ToYearMonth(iso8601Age, psex)); }; } else if (m>0 || d> 0) { return switch (psex) { diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java index 4363a91..d142b28 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java @@ -17,7 +17,19 @@ public class GermanPromptGeneratorTest { + private final static String case_vignette = """ +Ich führe ein Experiment mit einem klinischen Fallbericht durch, um zu sehen, wie sich Ihre Diagnosen mit denen menschlicher Experten vergleichen lassen. Ich werde Ihnen einen Teil eines medizinischen Falles vorstellen. Sie versuchen nicht, irgendwelche Patienten zu behandeln. In diesem Fall sind Sie „Dr. GPT-4“, ein KI-Sprachmodell, das eine Diagnose liefert. Hier sind einige Richtlinien. Erstens gibt es eine einzige definitive Diagnose, und es ist eine Diagnose, von der heute bekannt ist, dass sie beim Menschen existiert. Die Diagnose wird fast immer durch einen Gentest bestätigt. In seltenen Fällen, in denen ein solcher Test für eine Diagnose nicht existiert, kann die Diagnose jedoch anhand validierter klinischer Kriterien gestellt oder in sehr seltenen Fällen einfach durch eine Expertenmeinung bestätigt werden. Nachdem Sie den Fall gelesen haben, möchte ich, dass Sie eine Differentialdiagnose mit einer Liste von Kandidatendiagnosen stellen, die nach Wahrscheinlichkeit geordnet sind, beginnend mit dem wahrscheinlichsten Kandidaten. Jeder Kandidat sollte mit dem Krankheitsnamen angegeben werden. Wenn es sich bei dem ersten Kandidaten beispielsweise um das Branchiookulofaziale Syndrom und bei dem zweiten um Mukoviszidose handelt, geben Sie Folgendes in englischer Sprache an: +1. Branchiooculofacial syndrome +2. Cystic fibrosis + +Diese Liste sollte so viele Diagnosen enthalten, wie Sie für sinnvoll halten. + +Sie müssen Ihre Argumentation nicht erläutern, sondern nur die Diagnosen auflisten. +Ich habe Ihnen diese Anleitung auf English gegeben, aber ich bitte Sie, ihre Antwort ausschließlich auf English zu liefern. +Hier ist der Fall: + +Der Patient war ein 2jähriger Junge, der sich im Alter von 3 Tagen mit den folgenden Symptomen vorgestellt hat: Lymphopenie, Lungenentzündung und Verminderter zirkulierender IgA-Spiegel. Im Alter von 2 Jahren, präsentierte er mit den folgenden Symptomen: Verringertes zirkulierendes Gesamt-IgM."""; @Test public void testCase() { PpktIndividual i = twoYears(); @@ -35,10 +47,7 @@ public void testCase() { Map internationalMap = oboParser.getLanguageToInternationalMap(); PromptGenerator german = PromptGenerator.german(internationalMap.get("de")); String prompt = german.createPrompt(twoYears()); - - - System.out.println(prompt); - assertEquals("asdf", prompt); + assertEquals(case_vignette, prompt.trim()); } From 04b025c3f04f334339246dfefd27a414e7559b7d Mon Sep 17 00:00:00 2001 From: Leonardo macOS Date: Thu, 6 Jun 2024 15:13:15 +0200 Subject: [PATCH 31/39] resolved errors, mostly spaces, dots etc. One true error was a mixup of el and ella in spanish --- .../impl/spanish/PpktIndividualSpanish.java | 4 ++-- .../PpktPhenotypicFeatureEnglishTest.java | 4 ++-- .../impl/german/PpktIndividualGermanTest.java | 6 ++--- .../spanish/PpktIndividualSpanishTest.java | 24 +++++++++---------- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java index 383cec3..2b7a87e 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java @@ -419,8 +419,8 @@ private String ageNotAvailable(PhenopacketSex psex) { @Override public String heSheIndividual(PhenopacketSex psex) { return switch (psex) { - case FEMALE -> "el"; - case MALE -> "ella"; + case FEMALE -> "ella"; + case MALE -> "el"; default -> "la persona"; }; } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglishTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglishTest.java index 73bf9c7..33d8f37 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglishTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglishTest.java @@ -22,9 +22,9 @@ public class PpktPhenotypicFeatureEnglishTest { private static Stream testGetIndividualPhenotypicFeatures() { return Stream.of( new PPKtIndividualBase.TestIndividual("46 year olf female, infantile onset", - female46yearsInfantileOnset(), new PPKtIndividualBase.TestOutcome.Ok("Cerebellar atrophy and Ataxia")), + female46yearsInfantileOnset(), new PPKtIndividualBase.TestOutcome.Ok("Cerebellar atrophy and Ataxia. ")), new PPKtIndividualBase.TestIndividual("male 4 months, congenital onset", - male4monthsCongenitalOnset(), new PPKtIndividualBase.TestOutcome.Ok("Postaxial polydactyly")) + male4monthsCongenitalOnset(), new PPKtIndividualBase.TestOutcome.Ok("Postaxial polydactyly. ")) ); } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java index 40cdc9e..3c592d8 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java @@ -20,10 +20,10 @@ public class PpktIndividualGermanTest extends PPKtIndividualBase{ private static Stream testGetIndividualDescription() { return Stream.of( - new TestIndividual("46 year olf female, infantile onset", - female46yearsInfantileOnset(), new TestOutcome.Ok("Die Probandin war eine 46-jährige Frau, die sich im Säuglingsalter mit den folgenden Symptomen vorgestellt hat: ")), + new TestIndividual("46 year old female, infantile onset", + female46yearsInfantileOnset(), new TestOutcome.Ok("Die Patientin war eine 46-jährige Frau, die sich im Säuglingsalter mit den folgenden Symptomen vorgestellt hat: ")), new TestIndividual("male 4 months, congenital onset", - male4monthsCongenitalOnset(), new TestOutcome.Ok("Der Proband war ein 4 Monate alter Säugling, der sich bei der Geburt mit den folgenden Symptomen vorgestellt hat: ")), + male4monthsCongenitalOnset(), new TestOutcome.Ok("Der Patient war ein 4 Monate alter Säugling, der sich bei der Geburt mit den folgenden Symptomen vorgestellt hat: ")), new TestIndividual("female, no onset", femaleNoAge(), new TestOutcome.Ok("Die Patientin stellte sich mit den folgenden Symptomen vor: ")), new TestIndividual("female, no HPOs", diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java index 3fb3c15..3017a6e 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java @@ -20,16 +20,16 @@ public class PpktIndividualSpanishTest extends PPKtIndividualBase{ private static Stream testGetIndividualDescription() { return Stream.of( - new TestIndividual("46 year olf female, infantile onset", - female46yearsInfantileOnset(), new TestOutcome.Ok("La paciente era mujer de 46 años que se presentaba en el primer año de vida con")), + new TestIndividual("46 year old female, infantile onset", + female46yearsInfantileOnset(), new TestOutcome.Ok("La paciente era mujer de 46 años que se presentó en el primer año de vida con")), new TestIndividual("male 4 months, congenital onset", - male4monthsCongenitalOnset(), new TestOutcome.Ok("El paciente era un bebé de 4 meses que se presentaba al nacer con")), + male4monthsCongenitalOnset(), new TestOutcome.Ok("El paciente era un bebé de 4 meses que se presentó al nacer con")), new TestIndividual("female, no onset", - femaleNoAge(), new TestOutcome.Ok("La paciente se presentaba con")), + femaleNoAge(), new TestOutcome.Ok("La paciente se presentó con")), new TestIndividual("female, no HPOs", femaleNoHPOs(), new TestOutcome.Error(() -> new PhenolRuntimeException("No HPO annotations"))), new TestIndividual("unknown sex, no 4mo", - unknownSex4MonthOnset(), new TestOutcome.Ok("El paciente se presentaba en la niñez con")) + unknownSex4MonthOnset(), new TestOutcome.Ok("El paciente se presentó en la niñez con")) ); } @@ -56,11 +56,11 @@ void testEvaluateExpression(TestIndividual testCase) { private static Stream testGetPPKtSex() { return Stream.of( new TestIdvlHeShe("female", - PhenopacketSex.FEMALE, new TestOutcome.Ok("she")), + PhenopacketSex.FEMALE, new TestOutcome.Ok("ella")), new TestIdvlHeShe("male", - PhenopacketSex.MALE, new TestOutcome.Ok("he")), + PhenopacketSex.MALE, new TestOutcome.Ok("el")), new TestIdvlHeShe("proband", - PhenopacketSex.UNKNOWN, new TestOutcome.Ok("the individual")) + PhenopacketSex.UNKNOWN, new TestOutcome.Ok("la persona")) ); } @@ -84,13 +84,13 @@ void testPPKtSex(TestIdvlHeShe testCase) { private static Stream testIndlAtAge() { return Stream.of( new TestIdvlAtAge("congenital", - congenital, new TestOutcome.Ok("At birth")), + congenital, new TestOutcome.Ok("Al nacer")), new TestIdvlAtAge("infantile", - infantile, new TestOutcome.Ok("During the infantile period")), + infantile, new TestOutcome.Ok("Durante el período infantil")), new TestIdvlAtAge("childhood age", - childhood, new TestOutcome.Ok("During childhood")), + childhood, new TestOutcome.Ok("Durante la infancia")), new TestIdvlAtAge("46 years old", - p46y, new TestOutcome.Ok("At an age of 46 years")) + p46y, new TestOutcome.Ok("A la edad de 46 años")) ); } From accf6231ea708f8f2a8af1bb052f661c2292c98f Mon Sep 17 00:00:00 2001 From: Leonardo macOS Date: Thu, 6 Jun 2024 14:42:20 +0200 Subject: [PATCH 32/39] set up italian testing --- .../italian/ItalianPromptGeneratorTest.java | 10 ++ .../italian/PpktIndividualItalianTest.java | 124 ++++++++++++++++++ .../PpktPhenotypicFeatureItalianTest.java | 50 +++++++ 3 files changed, 184 insertions(+) create mode 100644 src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGeneratorTest.java create mode 100644 src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalianTest.java create mode 100644 src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktPhenotypicFeatureItalianTest.java diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGeneratorTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGeneratorTest.java new file mode 100644 index 0000000..bcf0cdd --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGeneratorTest.java @@ -0,0 +1,10 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.italian; + +public class ItalianPromptGeneratorTest { + + + + + + +} diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalianTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalianTest.java new file mode 100644 index 0000000..295ca60 --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalianTest.java @@ -0,0 +1,124 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.italian; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; +import org.monarchinitiative.phenopacket2prompt.output.impl.english.PpktIndividualEnglish; + +import java.util.function.Supplier; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public class PpktIndividualItalianTest extends PPKtIndividualBase{ + + + + private static Stream testGetIndividualDescription() { + return Stream.of( + new TestIndividual("46 year olf female, infantile onset", + female46yearsInfantileOnset(), new TestOutcome.Ok("The proband was a 46-year old woman who presented as an infant with")), + new TestIndividual("male 4 months, congenital onset", + male4monthsCongenitalOnset(), new TestOutcome.Ok("The proband was a 4-month old male infant who presented at birth with")), + new TestIndividual("female, no onset", + femaleNoAge(), new TestOutcome.Ok("The proband was a female who presented with")), + new TestIndividual("female, no HPOs", + femaleNoHPOs(), new TestOutcome.Error(() -> new PhenolRuntimeException("No HPO annotations"))), + new TestIndividual("unknown sex, no 4mo", + unknownSex4MonthOnset(), new TestOutcome.Ok("The proband presented in childhood with")) + ); + } + + + + @ParameterizedTest + @MethodSource("testGetIndividualDescription") + void testEvaluateExpression(TestIndividual testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualEnglish(); + PpktIndividual ppkti = testCase.ppktIndividual(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, generator.getIndividualDescription(ppkti), + "Incorrect evaluation for: " + testCase.description()); + case TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.getIndividualDescription(ppkti), + "Incorrect error handling for: " + testCase.description()); + } + } + + + + private static Stream testGetPPKtSex() { + return Stream.of( + new TestIdvlHeShe("female", + PhenopacketSex.FEMALE, new TestOutcome.Ok("she")), + new TestIdvlHeShe("male", + PhenopacketSex.MALE, new TestOutcome.Ok("he")), + new TestIdvlHeShe("proband", + PhenopacketSex.UNKNOWN, new TestOutcome.Ok("the individual")) + ); + } + + @ParameterizedTest + @MethodSource("testGetPPKtSex") + void testPPKtSex(TestIdvlHeShe testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualEnglish(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, generator.heSheIndividual(testCase.ppktSex())); + case TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.heSheIndividual(testCase.ppktSex()), + "Incorrect error handling for: " + testCase.description()); + } + } + + + +//public record TestIdvlAtAge(String description, PhenopacketAge ppktAge, TestOutcome expectedOutcome) {} + + + + + private static Stream testIndlAtAge() { + return Stream.of( + new TestIdvlAtAge("congenital", + congenital, new TestOutcome.Ok("At birth")), + new TestIdvlAtAge("infantile", + infantile, new TestOutcome.Ok("During the infantile period")), + new TestIdvlAtAge("childhood age", + childhood, new TestOutcome.Ok("During childhood")), + new TestIdvlAtAge("46 years old", + p46y, new TestOutcome.Ok("At an age of 46 years")) + ); + } + + + @ParameterizedTest + @MethodSource("testIndlAtAge") + void testPPKtSex(TestIdvlAtAge testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualEnglish(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, generator.atAge(testCase.ppktAge())); + case TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.atAge(testCase.ppktAge()), + "Incorrect error handling for: " + testCase.description()); + } + + + } + + + + + + +} diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktPhenotypicFeatureItalianTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktPhenotypicFeatureItalianTest.java new file mode 100644 index 0000000..8f20d42 --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktPhenotypicFeatureItalianTest.java @@ -0,0 +1,50 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.italian; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; +import org.monarchinitiative.phenopacket2prompt.output.impl.english.EnglishPromptGenerator; +import org.monarchinitiative.phenopacket2prompt.output.impl.english.PpktIndividualEnglish; + +import java.util.function.Supplier; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase.female46yearsInfantileOnset; +import static org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase.male4monthsCongenitalOnset; + +public class PpktPhenotypicFeatureItalianTest { + + + + + private static Stream testGetIndividualPhenotypicFeatures() { + return Stream.of( + new PPKtIndividualBase.TestIndividual("46 year olf female, infantile onset", + female46yearsInfantileOnset(), new PPKtIndividualBase.TestOutcome.Ok("Cerebellar atrophy and Ataxia")), + new PPKtIndividualBase.TestIndividual("male 4 months, congenital onset", + male4monthsCongenitalOnset(), new PPKtIndividualBase.TestOutcome.Ok("Postaxial polydactyly")) + ); + } + + @ParameterizedTest + @MethodSource("testGetIndividualPhenotypicFeatures") + void testEvaluateExpression(PPKtIndividualBase.TestIndividual testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualEnglish(); + EnglishPromptGenerator gen = new EnglishPromptGenerator(); + PpktIndividual ppkti = testCase.ppktIndividual(); + switch (testCase.expectedOutcome()) { + case PPKtIndividualBase.TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, gen.formatFeatures(ppkti.getPhenotypicFeaturesAtOnset()), + "Incorrect evaluation for: " + testCase.description()); + case PPKtIndividualBase.TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.getIndividualDescription(ppkti), + "Incorrect error handling for: " + testCase.description()); + } + } + +} From 6d6239080b0cde7db85646e43f0317d164711e69 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Thu, 6 Jun 2024 15:38:18 +0200 Subject: [PATCH 33/39] more testing-spanish --- .../cmd/GbtTranslateBatchCommand.java | 2 +- .../cmd/GptTranslateCommand.java | 2 +- .../output/PromptGenerator.java | 4 +- .../impl/spanish/PpktIndividualSpanish.java | 18 +++--- .../impl/spanish/SpanishPromptGenerator.java | 6 +- .../output/PPKtIndividualBase.java | 16 +++++ .../german/GermanPromptGeneratorTest.java | 6 ++ .../spanish/PpktIndividualSpanishTest.java | 22 +++---- .../impl/spanish/SpanishPromptTest.java | 58 +++++++++++++++++++ 9 files changed, 105 insertions(+), 29 deletions(-) create mode 100644 src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptTest.java diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java index 438eb16..bb0fae6 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java @@ -69,7 +69,7 @@ public Integer call() throws Exception { // output all non-English languages here // SPANISH - PromptGenerator spanish = PromptGenerator.spanish(hpo, internationalMap.get("es")); + PromptGenerator spanish = PromptGenerator.spanish(internationalMap.get("es")); resetOutput("es"); outputPromptsInternational(ppktFiles, hpo, "es", spanish); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java index 542d166..cb905c4 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java @@ -67,7 +67,7 @@ public Integer call() throws Exception { prompt = german.createPrompt(individual); } case "es" -> { - PromptGenerator spanish = PromptGenerator.spanish(hpo, internationalMap.get("es")); + PromptGenerator spanish = PromptGenerator.spanish(internationalMap.get("es")); prompt = spanish.createPrompt(individual); } case "nl" -> { diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java index a000fca..70b1379 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java @@ -34,9 +34,9 @@ static PromptGenerator english(){ return new EnglishPromptGenerator(); } - static PromptGenerator spanish(Ontology hpo, HpInternational international) { + static PromptGenerator spanish(HpInternational international) { PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureSpanish(international); - return new SpanishPromptGenerator(hpo, pfgen); + return new SpanishPromptGenerator(pfgen); } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java index 383cec3..060eaa0 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java @@ -246,9 +246,9 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 // if older if (y>17) { return switch (psex) { - case FEMALE -> String.format("mujer de %d años", y); - case MALE -> String.format("hombre de %d años", y); - default -> String.format("persona de %d años", y); + case FEMALE -> String.format("una mujer de %d años", y); + case MALE -> String.format("un hombre de %d años", y); + default -> String.format("una persona de %d años", y); }; } else if (y>9) { return switch (psex) { @@ -258,9 +258,9 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 }; } else if (y>0) { return switch (psex) { - case FEMALE -> String.format("niña %s", iso8601ToYearMonth(iso8601Age)); - case MALE -> String.format("niño %s", iso8601ToYearMonth(iso8601Age)); - default -> String.format("niño %s", iso8601ToYearMonth(iso8601Age)); + case FEMALE -> String.format("una niña %s", iso8601ToYearMonth(iso8601Age)); + case MALE -> String.format("un niño %s", iso8601ToYearMonth(iso8601Age)); + default -> String.format("un niño %s", iso8601ToYearMonth(iso8601Age)); }; } else if (m>0 || d> 0) { return switch (psex) { @@ -419,9 +419,9 @@ private String ageNotAvailable(PhenopacketSex psex) { @Override public String heSheIndividual(PhenopacketSex psex) { return switch (psex) { - case FEMALE -> "el"; - case MALE -> "ella"; - default -> "la persona"; + case FEMALE -> "ella"; + case MALE -> "el"; + default -> "el individuo"; }; } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java index d5bfa13..168b825 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java @@ -11,9 +11,6 @@ public class SpanishPromptGenerator implements PromptGenerator { - private final Ontology hpo; - - private final PPKtIndividualInfoGenerator ppktAgeSexGenerator; private final PhenopacketTextGenerator ppktTextGenerator; @@ -22,8 +19,7 @@ public class SpanishPromptGenerator implements PromptGenerator { - public SpanishPromptGenerator(Ontology hpo, PpktPhenotypicFeatureGenerator pfgen) { - this.hpo = hpo; + public SpanishPromptGenerator(PpktPhenotypicFeatureGenerator pfgen) { ppktAgeSexGenerator = new PpktIndividualSpanish(); ppktTextGenerator = new PpktTextSpanish(); this.ppktPhenotypicFeatureGenerator = pfgen; diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java index a3e6c7a..8fb7ec3 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java @@ -105,6 +105,22 @@ public static PpktIndividual twoYears() { return new PpktIndividual(builder.build()); } + public static PpktIndividual PMID_9312167_A() { + PhenopacketBuilder builder = PhenopacketBuilder.create("PMID_9312167_A:I:2", metadata); + Disease d = DiseaseBuilder.builder("OMIM:179800", "Distal renal tubular acidosis 1").build(); + Individual subject = IndividualBuilder.builder("A:I:2").female().ageAtLastEncounter("P40Y").build(); + var pf1 = PhenotypicFeatureBuilder.builder("HP:0000121","Nephrocalcinosis").build(); + var pf2 = PhenotypicFeatureBuilder.builder("HP:0002900","Hypokalemia").build(); + var pf3 = PhenotypicFeatureBuilder.builder("HP:0032944","Alkaline urine").build(); + var pf4 = PhenotypicFeatureBuilder.builder("HP:0012100","Abnormal circulating creatinine concentration").excluded().build(); + var pf5 = PhenotypicFeatureBuilder.builder("HP:0008341","Distal renal tubular acidosis").excluded().build(); + var lst = List.of(pf1, pf2, pf3, pf4, pf5); + builder.individual(subject).addDisease(d).addPhenotypicFeatures(lst); + return new PpktIndividual(builder.build()); + } + + + public static PhenopacketAge congenital = HpoOnsetAge.congenital(); diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java index d142b28..67f5fce 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java @@ -1,5 +1,6 @@ package org.monarchinitiative.phenopacket2prompt.output.impl.german; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenol.io.OntologyLoader; @@ -15,6 +16,11 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase.twoYears; + +/** + * Test only works with local hpo-international.obo + */ +@Disabled public class GermanPromptGeneratorTest { private final static String case_vignette = """ diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java index 3fb3c15..ea73f37 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java @@ -21,15 +21,15 @@ public class PpktIndividualSpanishTest extends PPKtIndividualBase{ private static Stream testGetIndividualDescription() { return Stream.of( new TestIndividual("46 year olf female, infantile onset", - female46yearsInfantileOnset(), new TestOutcome.Ok("La paciente era mujer de 46 años que se presentaba en el primer año de vida con")), + female46yearsInfantileOnset(), new TestOutcome.Ok("La paciente era una mujer de 46 años que se presentó en el primer año de vida con")), new TestIndividual("male 4 months, congenital onset", - male4monthsCongenitalOnset(), new TestOutcome.Ok("El paciente era un bebé de 4 meses que se presentaba al nacer con")), + male4monthsCongenitalOnset(), new TestOutcome.Ok("El paciente era un bebé de 4 meses que se presentó al nacer con")), new TestIndividual("female, no onset", - femaleNoAge(), new TestOutcome.Ok("La paciente se presentaba con")), + femaleNoAge(), new TestOutcome.Ok("La paciente se presentó con")), new TestIndividual("female, no HPOs", femaleNoHPOs(), new TestOutcome.Error(() -> new PhenolRuntimeException("No HPO annotations"))), new TestIndividual("unknown sex, no 4mo", - unknownSex4MonthOnset(), new TestOutcome.Ok("El paciente se presentaba en la niñez con")) + unknownSex4MonthOnset(), new TestOutcome.Ok("El paciente se presentó en la niñez con")) ); } @@ -56,11 +56,11 @@ void testEvaluateExpression(TestIndividual testCase) { private static Stream testGetPPKtSex() { return Stream.of( new TestIdvlHeShe("female", - PhenopacketSex.FEMALE, new TestOutcome.Ok("she")), + PhenopacketSex.FEMALE, new TestOutcome.Ok("ella")), new TestIdvlHeShe("male", - PhenopacketSex.MALE, new TestOutcome.Ok("he")), + PhenopacketSex.MALE, new TestOutcome.Ok("el")), new TestIdvlHeShe("proband", - PhenopacketSex.UNKNOWN, new TestOutcome.Ok("the individual")) + PhenopacketSex.UNKNOWN, new TestOutcome.Ok("el individuo")) ); } @@ -84,13 +84,13 @@ void testPPKtSex(TestIdvlHeShe testCase) { private static Stream testIndlAtAge() { return Stream.of( new TestIdvlAtAge("congenital", - congenital, new TestOutcome.Ok("At birth")), + congenital, new TestOutcome.Ok("Al nacer")), new TestIdvlAtAge("infantile", - infantile, new TestOutcome.Ok("During the infantile period")), + infantile, new TestOutcome.Ok("Durante el período infantil")), new TestIdvlAtAge("childhood age", - childhood, new TestOutcome.Ok("During childhood")), + childhood, new TestOutcome.Ok("Durante la infancia")), new TestIdvlAtAge("46 years old", - p46y, new TestOutcome.Ok("At an age of 46 years")) + p46y, new TestOutcome.Ok("A la edad de 46 años")) ); } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptTest.java new file mode 100644 index 0000000..7e3b344 --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptTest.java @@ -0,0 +1,58 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.spanish; + +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenol.io.OntologyLoader; +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenopacket2prompt.international.HpInternational; +import org.monarchinitiative.phenopacket2prompt.international.HpInternationalOboParser; +import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator; + +import java.io.File; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase.PMID_9312167_A; +import static org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase.twoYears; + +/** + * Test only works with local hpo-international.obo + */ +@Disabled +public class SpanishPromptTest { + + private static final String case_vignette = """ +Estoy realizando un experimento con el informe de un caso clínico para comparar sus diagnósticos con los de expertos humanos. Les voy a dar parte de un caso médico. No estás intentando tratar a ningún paciente. En este caso, usted es el “Dr. GPT-4”, un modelo de lenguaje de IA que proporciona un diagnóstico. Aquí hay algunas pautas. En primer lugar, existe un único diagnóstico definitivo, y es un diagnóstico que hoy se sabe que existe en humanos. El diagnóstico casi siempre se confirma mediante algún tipo de prueba genética, aunque en casos raros cuando no existe dicha prueba para un diagnóstico, el diagnóstico puede realizarse utilizando criterios clínicos validados o, muy raramente, simplemente confirmado por la opinión de un experto. Después de leer el caso, quiero que haga un diagnóstico diferencial con una lista de diagnósticos candidatos clasificados por probabilidad comenzando con el candidato más probable. Cada candidato debe especificarse con el nombre de la enfermedad. Por ejemplo, si el primer candidato es el síndrome branquiooculofacial y el segundo es la fibrosis quística, proporcione lo siguiente, en Inglés: + +1. Branchiooculofacial syndrome +2. Cystic fibrosis + +Esta lista debe proporcionar tantos diagnósticos como considere razonables. + +No es necesario que explique su razonamiento, simplemente enumere los diagnósticos. +Te estoy dando estas instrucciones en Español pero quiero que proveas todas tus respuestas en Inglés. +Este es el caso: + +El paciente era un niño de 2 años que se presentó a la edad de 3 dias con Linfopenia, Neumonía y Déficit de IgA. A la edad de 2 años, el presentó Déficit de IgM."""; + + @Test + public void test() { + var ppktI = PMID_9312167_A(); + File hpJsonFile = new File("data/hp.json"); + if (! hpJsonFile.isFile()) { + throw new PhenolRuntimeException("Could not find hp.json at " + hpJsonFile.getAbsolutePath()); + } + Ontology hpo = OntologyLoader.loadOntology(hpJsonFile); + File translationsFile = new File("data/hp-international.obo"); + if (! translationsFile.isFile()) { + System.err.printf("Could not find translations file at %s. Try download command", translationsFile.getAbsolutePath()); + return ; + } + HpInternationalOboParser oboParser = new HpInternationalOboParser(translationsFile); + Map internationalMap = oboParser.getLanguageToInternationalMap(); + PromptGenerator german = PromptGenerator.spanish(internationalMap.get("es")); + String prompt = german.createPrompt(twoYears()); + assertEquals(case_vignette, prompt.trim()); + } +} From e57ef349d65c13e798ff0d0eb1c1d4d6cf3985c6 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Thu, 6 Jun 2024 19:10:19 +0200 Subject: [PATCH 34/39] more testing-spanish --- .../cmd/GbtTranslateBatchCommand.java | 3 ++ .../spanish/PpktPhenotypicfeatureSpanish.java | 35 +++++++++++++------ .../output/PPKtIndividualBase.java | 18 +++++++++- .../impl/spanish/SpanishPromptTest.java | 25 +++++++++++-- 4 files changed, 68 insertions(+), 13 deletions(-) diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java index bb0fae6..596c7ce 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java @@ -216,6 +216,9 @@ private List getAllPhenopacketJsonFiles() { for (File item : items) { if (item.isDirectory()) ppktDirectories.add(ppktDir+item.getName()); + else if (item.isFile() && item.getName().endsWith(".json")) { + ppktFiles.add(item); + } } for (var f: ppktDirectories) { File subdir = new File(f); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java index 6e59f52..7876582 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java @@ -36,6 +36,23 @@ private List getTranslations(List ontologyTerms) { private final Set vowels = Set.of('A', 'E', 'I', 'O', 'U', 'Y'); + String getConnector(String nextWord) { + if (nextWord.length() < 2) { + return "y"; // should never happen but do not want to crash + } + Character letter = nextWord.charAt(0); + if (vowels.contains(letter)) { + return " i "; + } + Character letter2 = nextWord.charAt(1); + if (letter == 'H' && vowels.contains(letter2)) { + return " i "; + } + return " y "; + + } + + private String getOxfordCommaList(List items) { if (items.size() == 1) { return items.getFirst(); @@ -43,17 +60,15 @@ private String getOxfordCommaList(List items) { if (items.size() == 2) { // no comma if we just have two items. // one item will work with the below code - return String.join(" and ", items); + String connector = getConnector(items.get(1)); + return String.join(connector, items); } String symList = String.join(", ", items); int jj = symList.lastIndexOf(", "); if (jj > 0) { String end = symList.substring(jj+2); - if (vowels.contains(end.charAt(0))) { - symList = symList.substring(0, jj) + " i " + end; - } else { - symList = symList.substring(0, jj) + " y " + end; - } + String connector = getConnector(end); + symList = symList.substring(0, jj) + connector + end; } return symList; } @@ -73,16 +88,16 @@ public String formatFeatures(List ontologyTerms) { return getOxfordCommaList(observedLabels) + ". "; } else if (observedLabels.isEmpty()) { if (excludedLabels.size() > 1) { - return String.format("por lo que se excluyeron %s.", getOxfordCommaList(excludedLabels)); + return String.format("se descartaron %s.", getOxfordCommaList(excludedLabels)); } else { - return String.format("por lo que %s fue excluido.",excludedLabels.getFirst()); + return String.format("se descartó %s.",excludedLabels.getFirst()); } } else { String exclusion; if (excludedLabels.size() == 1) { - exclusion = String.format(" y se excluyó %s.", getOxfordCommaList(excludedLabels)); + exclusion = String.format(" En cambio, se descartó %s.", getOxfordCommaList(excludedLabels)); } else { - exclusion = String.format(" y se excluyeron %s.", getOxfordCommaList(excludedLabels)); + exclusion = String.format(" En cambio, se descartaron %s.", getOxfordCommaList(excludedLabels)); } return getOxfordCommaList(observedLabels) + exclusion; } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java index 8fb7ec3..f7c0458 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java @@ -119,7 +119,23 @@ public static PpktIndividual PMID_9312167_A() { return new PpktIndividual(builder.build()); } - + /* + El paciente era un hombre de 30 años qui se presentó con se descartaron Máculas hipomelanóticas, Rabdomioma cardíaco y Bradicardia.A la edad de 30 años, el presentó Fositas o muescas (pits) del esmalte dental y Fibromatosis gingival. + */ + public static PpktIndividual onlyExcludedAtPresentation() { + PhenopacketBuilder builder = PhenopacketBuilder.create("id8", metadata); + Disease d = DiseaseBuilder.builder("OMIM:100123", "test").onset(TimeElements.age("P3D")).build(); + Individual subject = IndividualBuilder.builder("individual.6").male().ageAtLastEncounter("P30Y").build(); + // HP:Gingival fibromatosis HP: + var pf1 = PhenotypicFeatureBuilder.builder("HP:0001662","Bradycardia").excluded().build(); + var pf2 = PhenotypicFeatureBuilder.builder("HP:0009729","Cardiac rhabdomyoma").excluded().build(); + var pf3 = PhenotypicFeatureBuilder.builder("HP:0009719","Hypomelanotic macule").excluded().build(); + var pf4 =PhenotypicFeatureBuilder.builder("HP:0009722","Dental enamel pits").onset(TimeElements.age("P30Y")).build(); + var pf5 =PhenotypicFeatureBuilder.builder("HP:0000169","Gingival fibromatosis").onset(TimeElements.age("P30Y")).build(); + var features = List.of(pf1,pf2,pf3,pf4,pf5); + builder.individual(subject).addDisease(d).addPhenotypicFeatures(features); + return new PpktIndividual(builder.build()); + } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptTest.java index 7e3b344..0362553 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptTest.java @@ -13,8 +13,7 @@ import java.util.Map; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase.PMID_9312167_A; -import static org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase.twoYears; +import static org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase.*; /** * Test only works with local hpo-international.obo @@ -55,4 +54,26 @@ public void test() { String prompt = german.createPrompt(twoYears()); assertEquals(case_vignette, prompt.trim()); } + + + @Test + public void testNoObservedAtOnset() { + File hpJsonFile = new File("data/hp.json"); + if (! hpJsonFile.isFile()) { + throw new PhenolRuntimeException("Could not find hp.json at " + hpJsonFile.getAbsolutePath()); + } + Ontology hpo = OntologyLoader.loadOntology(hpJsonFile); + File translationsFile = new File("data/hp-international.obo"); + if (! translationsFile.isFile()) { + System.err.printf("Could not find translations file at %s. Try download command", translationsFile.getAbsolutePath()); + return ; + } + HpInternationalOboParser oboParser = new HpInternationalOboParser(translationsFile); + Map internationalMap = oboParser.getLanguageToInternationalMap(); + PromptGenerator spanish = PromptGenerator.spanish(internationalMap.get("es")); + String prompt = spanish.createPrompt(onlyExcludedAtPresentation()); + assertEquals(case_vignette, prompt.trim()); + } + + } From 5759ef96342dbf55cc3ab56177416aa6c09c44cb Mon Sep 17 00:00:00 2001 From: Leonardo macOS Date: Fri, 7 Jun 2024 12:02:47 +0200 Subject: [PATCH 35/39] setup italian testing and fixed minor bugs, but careful with merging into develop --- .../cmd/GbtTranslateBatchCommand.java | 4 +- .../cmd/GptTranslateCommand.java | 6 ++- .../output/PromptGenerator.java | 8 +-- .../impl/german/GermanPromptGenerator.java | 6 ++- .../impl/italian/ItalianPromptGenerator.java | 5 +- .../impl/italian/PpktIndividualItalian.java | 33 ++++++------ .../PpktPhenotypicFeatureItalianTest.java | 50 ------------------- 7 files changed, 38 insertions(+), 74 deletions(-) delete mode 100644 src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktPhenotypicFeatureItalianTest.java diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java index 438eb16..61604fd 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java @@ -78,12 +78,12 @@ public Integer call() throws Exception { outputPromptsInternational(ppktFiles, hpo, "nl", dutch); // GERMAN resetOutput("de"); - PromptGenerator german = PromptGenerator.german(internationalMap.get("de")); + PromptGenerator german = PromptGenerator.german(hpo, internationalMap.get("de")); outputPromptsInternational(ppktFiles, hpo, "de", german); // ITALIAN resetOutput("it"); - PromptGenerator italian = PromptGenerator.italian(internationalMap.get("it")); + PromptGenerator italian = PromptGenerator.italian(hpo, internationalMap.get("it")); outputPromptsInternational(ppktFiles, hpo, "it", italian); resetOutput("finished"); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java index 542d166..d147ef1 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java @@ -63,7 +63,7 @@ public Integer call() throws Exception { System.out.println(prompt); switch (languageCode) { case "de" -> { - PromptGenerator german = PromptGenerator.german(internationalMap.get("de")); + PromptGenerator german = PromptGenerator.german(hpo, internationalMap.get("de")); prompt = german.createPrompt(individual); } case "es" -> { @@ -74,6 +74,10 @@ public Integer call() throws Exception { PromptGenerator dutch = PromptGenerator.dutch(hpo, internationalMap.get("nl")); prompt = dutch.createPrompt(individual); } + case "it" -> { + PromptGenerator italian = PromptGenerator.italian(hpo, internationalMap.get("it")); + prompt = italian.createPrompt(individual); + } default -> prompt = "did not recognize language code " + languageCode; } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java index a000fca..cc42783 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java @@ -45,13 +45,13 @@ static PromptGenerator dutch(Ontology hpo, HpInternational international) { return new DutchPromptGenerator(hpo, pfgen); } - static PromptGenerator german(HpInternational international) { + static PromptGenerator german(Ontology hpo, HpInternational international) { PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureGerman(international); - return new GermanPromptGenerator(pfgen); + return new GermanPromptGenerator(hpo, pfgen); } - static PromptGenerator italian(HpInternational international) { + static PromptGenerator italian(Ontology hpo, HpInternational international) { PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureItalian(international); - return new ItalianPromptGenerator(pfgen); + return new ItalianPromptGenerator(hpo, pfgen); } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java index d80869f..f52770c 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java @@ -1,5 +1,6 @@ package org.monarchinitiative.phenopacket2prompt.output.impl.german; +import org.monarchinitiative.phenol.ontology.data.Ontology; import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; @@ -14,6 +15,8 @@ public class GermanPromptGenerator implements PromptGenerator { + private final Ontology hpo; + private final PPKtIndividualInfoGenerator ppktAgeSexGenerator; private final PhenopacketTextGenerator ppktTextGenerator; @@ -22,7 +25,8 @@ public class GermanPromptGenerator implements PromptGenerator { - public GermanPromptGenerator(PpktPhenotypicFeatureGenerator pfgen) { + public GermanPromptGenerator(Ontology hpo,PpktPhenotypicFeatureGenerator pfgen) { + this.hpo = hpo; ppktAgeSexGenerator = new PpktIndividualGerman(); ppktTextGenerator = new PpktTextGerman(); this.ppktPhenotypicFeatureGenerator = pfgen; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java index 6016570..f2c6bef 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java @@ -15,6 +15,8 @@ public class ItalianPromptGenerator implements PromptGenerator { + private final Ontology hpo; + private final PPKtIndividualInfoGenerator ppktAgeSexGenerator; private final PhenopacketTextGenerator ppktTextGenerator; @@ -23,7 +25,8 @@ public class ItalianPromptGenerator implements PromptGenerator { - public ItalianPromptGenerator(PpktPhenotypicFeatureGenerator pfgen) { + public ItalianPromptGenerator(Ontology hpo, PpktPhenotypicFeatureGenerator pfgen) { + this.hpo = hpo; ppktAgeSexGenerator = new PpktIndividualItalian(); ppktTextGenerator = new PpktTextItalian(); this.ppktPhenotypicFeatureGenerator = pfgen; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalian.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalian.java index c06950a..c0868b4 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalian.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalian.java @@ -75,7 +75,7 @@ public String ageAndSexAtLastExamination(PpktIndividual individual) { if (y > 17) { return String.format("una donna di %d anni", y); } else if (y > 9) { - return String.format("una adolescente di %d anni", y); + return String.format("un'adolescente femmina di %d anni", y); } else if (y > 0) { return String.format("una bambina di %d anni", y); @@ -148,6 +148,9 @@ private String atIsoAgeExact(PhenopacketAge ppktAge) { @Override public String getIndividualDescription(PpktIndividual individual) { + if (individual.annotationCount() == 0) { + throw new PhenolRuntimeException("No HPO annotations"); + } Optional lastExamOpt = individual.getAgeAtLastExamination(); Optional onsetOpt = individual.getAgeAtOnset(); PhenopacketSex psex = individual.getSex(); @@ -223,7 +226,7 @@ private String onsetTermAtAgeOf(HpoOnsetAge hpoOnsetTermAge) { if (hpoOnsetTermAge.isFetus()) { return "nel periodo fetale"; } else if (hpoOnsetTermAge.isCongenital()) { - return "nel periodo neonatale"; + return "alla nascita"; } else if (hpoOnsetTermAge.isInfant()) { return "nel periodo infantile"; // unsure, to be checked } else if (hpoOnsetTermAge.isChild()) { @@ -249,8 +252,8 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 }; } else if (y>9) { return switch (psex) { - case FEMALE -> String.format("un'adolescente di %d anni", y); - case MALE -> String.format("un adolescente di %d anni", y); + case FEMALE -> String.format("un'adolescente femmina di %d anni", y); + case MALE -> String.format("un adolescente maschio di %d anni", y); default -> String.format("un adolescente di %d anni", y); }; } else if (y>0) { @@ -261,9 +264,9 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 }; } else if (m>0 || d> 0) { return switch (psex) { - case FEMALE -> String.format("una infante %s", iso8601ToMonthDay(iso8601Age)); - case MALE -> String.format("un infante %s", iso8601ToMonthDay(iso8601Age)); - default -> String.format("un infante %s", iso8601ToMonthDay(iso8601Age)); + case FEMALE -> String.format("%s %s", FEMALE_INFANT, iso8601ToMonthDay(iso8601Age)); + case MALE -> String.format("%s %s", MALE_INFANT, iso8601ToMonthDay(iso8601Age)); + default -> String.format("%s %s", INFANT, iso8601ToMonthDay(iso8601Age)); }; } else { return switch (psex) { @@ -368,7 +371,7 @@ private String lastEncounterAvailable(PhenopacketSex psex, PhenopacketAge lastEx // should never happen throw new PhenolRuntimeException("Did not recognize last exam age type " + lastExamAge.ageType()); } - return String.format("Il paziente era %s che si è presentato ", individualDescription); + return String.format("Il soggetto era %s che si è presentato ", individualDescription); } /** @@ -390,23 +393,23 @@ private String onsetAvailable(PhenopacketSex psex, PhenopacketAge onsetAge) { // should never happen throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); } - return String.format("Il paziente si è presentato %s con", onsetDescription); + return String.format("Il soggetto si è presentato %s con", onsetDescription); } private String ageNotAvailable(PhenopacketSex psex) { return switch (psex) { - case FEMALE -> "La paziente si è presentata con"; - case MALE -> "Il paziente si è presentato con"; - default -> "Il paziente si è presentato con"; + case FEMALE -> "Il soggetto era una femmina che si è presentata con"; + case MALE -> "Il soggetto era un maschio si è presentato con"; + default -> "Il soggetto si è presentato con"; }; } @Override public String heSheIndividual(PhenopacketSex psex) { return switch (psex) { - case FEMALE -> "lui"; - case MALE -> "lei"; - default -> "la persona"; + case FEMALE -> "lei"; + case MALE -> "lui"; + default -> "il soggetto"; }; } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktPhenotypicFeatureItalianTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktPhenotypicFeatureItalianTest.java deleted file mode 100644 index 8f20d42..0000000 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktPhenotypicFeatureItalianTest.java +++ /dev/null @@ -1,50 +0,0 @@ -package org.monarchinitiative.phenopacket2prompt.output.impl.italian; - -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.MethodSource; -import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; -import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase; -import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; -import org.monarchinitiative.phenopacket2prompt.output.impl.english.EnglishPromptGenerator; -import org.monarchinitiative.phenopacket2prompt.output.impl.english.PpktIndividualEnglish; - -import java.util.function.Supplier; -import java.util.stream.Stream; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase.female46yearsInfantileOnset; -import static org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase.male4monthsCongenitalOnset; - -public class PpktPhenotypicFeatureItalianTest { - - - - - private static Stream testGetIndividualPhenotypicFeatures() { - return Stream.of( - new PPKtIndividualBase.TestIndividual("46 year olf female, infantile onset", - female46yearsInfantileOnset(), new PPKtIndividualBase.TestOutcome.Ok("Cerebellar atrophy and Ataxia")), - new PPKtIndividualBase.TestIndividual("male 4 months, congenital onset", - male4monthsCongenitalOnset(), new PPKtIndividualBase.TestOutcome.Ok("Postaxial polydactyly")) - ); - } - - @ParameterizedTest - @MethodSource("testGetIndividualPhenotypicFeatures") - void testEvaluateExpression(PPKtIndividualBase.TestIndividual testCase) { - PPKtIndividualInfoGenerator generator = new PpktIndividualEnglish(); - EnglishPromptGenerator gen = new EnglishPromptGenerator(); - PpktIndividual ppkti = testCase.ppktIndividual(); - switch (testCase.expectedOutcome()) { - case PPKtIndividualBase.TestOutcome.Ok(String expectedResult) -> - assertEquals(expectedResult, gen.formatFeatures(ppkti.getPhenotypicFeaturesAtOnset()), - "Incorrect evaluation for: " + testCase.description()); - case PPKtIndividualBase.TestOutcome.Error(Supplier exceptionSupplier) -> - assertThrows(exceptionSupplier.get().getClass(), - () -> generator.getIndividualDescription(ppkti), - "Incorrect error handling for: " + testCase.description()); - } - } - -} From ab445177a59eced816901983b22167e13bc6589e Mon Sep 17 00:00:00 2001 From: Leonardo macOS Date: Fri, 7 Jun 2024 15:35:31 +0200 Subject: [PATCH 36/39] fixing several tests --- .../output/PPKtIndividualBase.java | 2 +- .../english/PpktIndividualEnglishTest.java | 4 +-- .../german/GermanPromptGeneratorTest.java | 2 +- .../impl/german/PpktIndividualGermanTest.java | 2 +- .../italian/PpktIndividualItalianTest.java | 33 +++++++++---------- .../spanish/PpktIndividualSpanishTest.java | 2 +- 6 files changed, 22 insertions(+), 23 deletions(-) diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java index a3e6c7a..28a4177 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java @@ -80,7 +80,7 @@ public static PpktIndividual femaleNoHPOs() { return new PpktIndividual(builder.build()); } - public static PpktIndividual unknownSex4MonthOnset() { + public static PpktIndividual unknownSex4YearsOnset() { PhenopacketBuilder builder = PhenopacketBuilder.create("id5", metadata); Disease d = DiseaseBuilder.builder("OMIM:100123", "test").onset(TimeElements.childhoodOnset()).build(); Individual subject = IndividualBuilder.builder("individual.5").unknownSex().build(); diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java index 9992eb1..533d9d0 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java @@ -28,8 +28,8 @@ private static Stream testGetIndividualDescription() { femaleNoAge(), new TestOutcome.Ok("The proband was a female who presented with")), new TestIndividual("female, no HPOs", femaleNoHPOs(), new TestOutcome.Error(() -> new PhenolRuntimeException("No HPO annotations"))), - new TestIndividual("unknown sex, no 4mo", - unknownSex4MonthOnset(), new TestOutcome.Ok("The proband presented in childhood with")) + new TestIndividual("unknown sex, no 4yo", + unknownSex4YearsOnset(), new TestOutcome.Ok("The proband presented in childhood with")) ); } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java index d142b28..be71e8b 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java @@ -45,7 +45,7 @@ public void testCase() { } HpInternationalOboParser oboParser = new HpInternationalOboParser(translationsFile); Map internationalMap = oboParser.getLanguageToInternationalMap(); - PromptGenerator german = PromptGenerator.german(internationalMap.get("de")); + PromptGenerator german = PromptGenerator.german(hpo, internationalMap.get("de")); String prompt = german.createPrompt(twoYears()); assertEquals(case_vignette, prompt.trim()); } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java index 3c592d8..ae6cbde 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java @@ -29,7 +29,7 @@ private static Stream testGetIndividualDescription() { new TestIndividual("female, no HPOs", femaleNoHPOs(), new TestOutcome.Error(() -> new PhenolRuntimeException("No HPO annotations"))), new TestIndividual("unknown sex, no 4mo", - unknownSex4MonthOnset(), new TestOutcome.Ok("Der Patient stellte sich in der Kindheit mit den folgenden Symptomen vor: ")) + unknownSex4YearsOnset(), new TestOutcome.Ok("Der Patient stellte sich in der Kindheit mit den folgenden Symptomen vor: ")) ); } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalianTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalianTest.java index 295ca60..334ce96 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalianTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalianTest.java @@ -7,7 +7,6 @@ import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase; import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; -import org.monarchinitiative.phenopacket2prompt.output.impl.english.PpktIndividualEnglish; import java.util.function.Supplier; import java.util.stream.Stream; @@ -21,16 +20,16 @@ public class PpktIndividualItalianTest extends PPKtIndividualBase{ private static Stream testGetIndividualDescription() { return Stream.of( - new TestIndividual("46 year olf female, infantile onset", - female46yearsInfantileOnset(), new TestOutcome.Ok("The proband was a 46-year old woman who presented as an infant with")), + new TestIndividual("46 year old female, infantile onset", + female46yearsInfantileOnset(), new TestOutcome.Ok("Il soggetto era una donna di 46 anni che si è presentato nel periodo infantile con")), new TestIndividual("male 4 months, congenital onset", - male4monthsCongenitalOnset(), new TestOutcome.Ok("The proband was a 4-month old male infant who presented at birth with")), + male4monthsCongenitalOnset(), new TestOutcome.Ok("Il soggetto era un infante maschio di 4 mesi che si è presentato alla nascita con")), new TestIndividual("female, no onset", - femaleNoAge(), new TestOutcome.Ok("The proband was a female who presented with")), + femaleNoAge(), new TestOutcome.Ok("Il soggetto era una femmina che si è presentata con")), new TestIndividual("female, no HPOs", - femaleNoHPOs(), new TestOutcome.Error(() -> new PhenolRuntimeException("No HPO annotations"))), + femaleNoHPOs(), new TestOutcome.Error(() -> new PhenolRuntimeException("Nessuna anomalia fenotipica"))), new TestIndividual("unknown sex, no 4mo", - unknownSex4MonthOnset(), new TestOutcome.Ok("The proband presented in childhood with")) + unknownSex4YearsOnset(), new TestOutcome.Ok("Il soggetto si è presentato da bambino con")) ); } @@ -39,7 +38,7 @@ private static Stream testGetIndividualDescription() { @ParameterizedTest @MethodSource("testGetIndividualDescription") void testEvaluateExpression(TestIndividual testCase) { - PPKtIndividualInfoGenerator generator = new PpktIndividualEnglish(); + PPKtIndividualInfoGenerator generator = new PpktIndividualItalian(); PpktIndividual ppkti = testCase.ppktIndividual(); switch (testCase.expectedOutcome()) { case TestOutcome.Ok(String expectedResult) -> @@ -57,18 +56,18 @@ void testEvaluateExpression(TestIndividual testCase) { private static Stream testGetPPKtSex() { return Stream.of( new TestIdvlHeShe("female", - PhenopacketSex.FEMALE, new TestOutcome.Ok("she")), + PhenopacketSex.FEMALE, new TestOutcome.Ok("lei")), new TestIdvlHeShe("male", - PhenopacketSex.MALE, new TestOutcome.Ok("he")), + PhenopacketSex.MALE, new TestOutcome.Ok("lui")), new TestIdvlHeShe("proband", - PhenopacketSex.UNKNOWN, new TestOutcome.Ok("the individual")) + PhenopacketSex.UNKNOWN, new TestOutcome.Ok("il soggetto")) ); } @ParameterizedTest @MethodSource("testGetPPKtSex") void testPPKtSex(TestIdvlHeShe testCase) { - PPKtIndividualInfoGenerator generator = new PpktIndividualEnglish(); + PPKtIndividualInfoGenerator generator = new PpktIndividualItalian(); switch (testCase.expectedOutcome()) { case TestOutcome.Ok(String expectedResult) -> assertEquals(expectedResult, generator.heSheIndividual(testCase.ppktSex())); @@ -89,13 +88,13 @@ void testPPKtSex(TestIdvlHeShe testCase) { private static Stream testIndlAtAge() { return Stream.of( new TestIdvlAtAge("congenital", - congenital, new TestOutcome.Ok("At birth")), + congenital, new TestOutcome.Ok("Alla nascita")), new TestIdvlAtAge("infantile", - infantile, new TestOutcome.Ok("During the infantile period")), + infantile, new TestOutcome.Ok("Durante il periodo infantile")), new TestIdvlAtAge("childhood age", - childhood, new TestOutcome.Ok("During childhood")), + childhood, new TestOutcome.Ok("Durante l'infanzia")), new TestIdvlAtAge("46 years old", - p46y, new TestOutcome.Ok("At an age of 46 years")) + p46y, new TestOutcome.Ok("All'età di 46 anni")) ); } @@ -103,7 +102,7 @@ private static Stream testIndlAtAge() { @ParameterizedTest @MethodSource("testIndlAtAge") void testPPKtSex(TestIdvlAtAge testCase) { - PPKtIndividualInfoGenerator generator = new PpktIndividualEnglish(); + PPKtIndividualInfoGenerator generator = new PpktIndividualItalian(); switch (testCase.expectedOutcome()) { case TestOutcome.Ok(String expectedResult) -> assertEquals(expectedResult, generator.atAge(testCase.ppktAge())); diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java index 3017a6e..5d88626 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java @@ -29,7 +29,7 @@ private static Stream testGetIndividualDescription() { new TestIndividual("female, no HPOs", femaleNoHPOs(), new TestOutcome.Error(() -> new PhenolRuntimeException("No HPO annotations"))), new TestIndividual("unknown sex, no 4mo", - unknownSex4MonthOnset(), new TestOutcome.Ok("El paciente se presentó en la niñez con")) + unknownSex4YearsOnset(), new TestOutcome.Ok("El paciente se presentó en la niñez con")) ); } From 30c3a0f89aa9f758a36d8c5ccd312d392d7a12c6 Mon Sep 17 00:00:00 2001 From: Leonardo macOS Date: Fri, 7 Jun 2024 16:39:52 +0200 Subject: [PATCH 37/39] got rid of hpo variables we do not want --- .../cmd/GbtTranslateBatchCommand.java | 8 ++++---- .../cmd/GptTranslateCommand.java | 8 ++++---- .../output/PromptGenerator.java | 17 ++++++++--------- .../output/impl/dutch/DutchPromptGenerator.java | 2 +- .../impl/german/GermanPromptGenerator.java | 5 +---- .../impl/italian/ItalianPromptGenerator.java | 5 +---- .../impl/spanish/SpanishPromptGenerator.java | 5 +---- .../impl/german/GermanPromptGeneratorTest.java | 2 +- .../impl/german/PpktIndividualGermanTest.java | 2 +- .../impl/italian/PpktIndividualItalianTest.java | 2 +- .../impl/spanish/PpktIndividualSpanishTest.java | 2 +- 11 files changed, 24 insertions(+), 34 deletions(-) diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java index 61604fd..1767cba 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java @@ -69,21 +69,21 @@ public Integer call() throws Exception { // output all non-English languages here // SPANISH - PromptGenerator spanish = PromptGenerator.spanish(hpo, internationalMap.get("es")); + PromptGenerator spanish = PromptGenerator.spanish(internationalMap.get("es")); resetOutput("es"); outputPromptsInternational(ppktFiles, hpo, "es", spanish); resetOutput("nl"); - PromptGenerator dutch = PromptGenerator.dutch(hpo, internationalMap.get("nl")); + PromptGenerator dutch = PromptGenerator.dutch(internationalMap.get("nl")); outputPromptsInternational(ppktFiles, hpo, "nl", dutch); // GERMAN resetOutput("de"); - PromptGenerator german = PromptGenerator.german(hpo, internationalMap.get("de")); + PromptGenerator german = PromptGenerator.german(internationalMap.get("de")); outputPromptsInternational(ppktFiles, hpo, "de", german); // ITALIAN resetOutput("it"); - PromptGenerator italian = PromptGenerator.italian(hpo, internationalMap.get("it")); + PromptGenerator italian = PromptGenerator.italian(internationalMap.get("it")); outputPromptsInternational(ppktFiles, hpo, "it", italian); resetOutput("finished"); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java index d147ef1..478bf60 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java @@ -63,19 +63,19 @@ public Integer call() throws Exception { System.out.println(prompt); switch (languageCode) { case "de" -> { - PromptGenerator german = PromptGenerator.german(hpo, internationalMap.get("de")); + PromptGenerator german = PromptGenerator.german(internationalMap.get("de")); prompt = german.createPrompt(individual); } case "es" -> { - PromptGenerator spanish = PromptGenerator.spanish(hpo, internationalMap.get("es")); + PromptGenerator spanish = PromptGenerator.spanish(internationalMap.get("es")); prompt = spanish.createPrompt(individual); } case "nl" -> { - PromptGenerator dutch = PromptGenerator.dutch(hpo, internationalMap.get("nl")); + PromptGenerator dutch = PromptGenerator.dutch(internationalMap.get("nl")); prompt = dutch.createPrompt(individual); } case "it" -> { - PromptGenerator italian = PromptGenerator.italian(hpo, internationalMap.get("it")); + PromptGenerator italian = PromptGenerator.italian(internationalMap.get("it")); prompt = italian.createPrompt(individual); } default -> prompt = "did not recognize language code " + languageCode; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java index cc42783..7586ccd 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java @@ -1,6 +1,5 @@ package org.monarchinitiative.phenopacket2prompt.output; -import org.monarchinitiative.phenol.ontology.data.Ontology; import org.monarchinitiative.phenopacket2prompt.international.HpInternational; import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; @@ -34,24 +33,24 @@ static PromptGenerator english(){ return new EnglishPromptGenerator(); } - static PromptGenerator spanish(Ontology hpo, HpInternational international) { + static PromptGenerator spanish(HpInternational international) { PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureSpanish(international); - return new SpanishPromptGenerator(hpo, pfgen); + return new SpanishPromptGenerator(pfgen); } - static PromptGenerator dutch(Ontology hpo, HpInternational international) { + static PromptGenerator dutch(HpInternational international) { PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureDutch(international); - return new DutchPromptGenerator(hpo, pfgen); + return new DutchPromptGenerator(pfgen); } - static PromptGenerator german(Ontology hpo, HpInternational international) { + static PromptGenerator german(HpInternational international) { PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureGerman(international); - return new GermanPromptGenerator(hpo, pfgen); + return new GermanPromptGenerator(pfgen); } - static PromptGenerator italian(Ontology hpo, HpInternational international) { + static PromptGenerator italian(HpInternational international) { PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureItalian(international); - return new ItalianPromptGenerator(hpo, pfgen); + return new ItalianPromptGenerator(pfgen); } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java index a7ad7cc..6e89600 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java @@ -26,7 +26,7 @@ public class DutchPromptGenerator implements PromptGenerator { - public DutchPromptGenerator(Ontology hpo, PpktPhenotypicFeatureGenerator pfgen) { + public DutchPromptGenerator(PpktPhenotypicFeatureGenerator pfgen) { this.hpo = hpo; ppktAgeSexGenerator = new PpktIndividualDutch(); ppktTextGenerator = new PpktTextDutch(); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java index f52770c..25c2ace 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java @@ -15,8 +15,6 @@ public class GermanPromptGenerator implements PromptGenerator { - private final Ontology hpo; - private final PPKtIndividualInfoGenerator ppktAgeSexGenerator; private final PhenopacketTextGenerator ppktTextGenerator; @@ -25,8 +23,7 @@ public class GermanPromptGenerator implements PromptGenerator { - public GermanPromptGenerator(Ontology hpo,PpktPhenotypicFeatureGenerator pfgen) { - this.hpo = hpo; + public GermanPromptGenerator(PpktPhenotypicFeatureGenerator pfgen) { ppktAgeSexGenerator = new PpktIndividualGerman(); ppktTextGenerator = new PpktTextGerman(); this.ppktPhenotypicFeatureGenerator = pfgen; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java index f2c6bef..6016570 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java @@ -15,8 +15,6 @@ public class ItalianPromptGenerator implements PromptGenerator { - private final Ontology hpo; - private final PPKtIndividualInfoGenerator ppktAgeSexGenerator; private final PhenopacketTextGenerator ppktTextGenerator; @@ -25,8 +23,7 @@ public class ItalianPromptGenerator implements PromptGenerator { - public ItalianPromptGenerator(Ontology hpo, PpktPhenotypicFeatureGenerator pfgen) { - this.hpo = hpo; + public ItalianPromptGenerator(PpktPhenotypicFeatureGenerator pfgen) { ppktAgeSexGenerator = new PpktIndividualItalian(); ppktTextGenerator = new PpktTextItalian(); this.ppktPhenotypicFeatureGenerator = pfgen; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java index d5bfa13..39ddb33 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java @@ -11,8 +11,6 @@ public class SpanishPromptGenerator implements PromptGenerator { - private final Ontology hpo; - private final PPKtIndividualInfoGenerator ppktAgeSexGenerator; @@ -22,8 +20,7 @@ public class SpanishPromptGenerator implements PromptGenerator { - public SpanishPromptGenerator(Ontology hpo, PpktPhenotypicFeatureGenerator pfgen) { - this.hpo = hpo; + public SpanishPromptGenerator(PpktPhenotypicFeatureGenerator pfgen) { ppktAgeSexGenerator = new PpktIndividualSpanish(); ppktTextGenerator = new PpktTextSpanish(); this.ppktPhenotypicFeatureGenerator = pfgen; diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java index be71e8b..d142b28 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java @@ -45,7 +45,7 @@ public void testCase() { } HpInternationalOboParser oboParser = new HpInternationalOboParser(translationsFile); Map internationalMap = oboParser.getLanguageToInternationalMap(); - PromptGenerator german = PromptGenerator.german(hpo, internationalMap.get("de")); + PromptGenerator german = PromptGenerator.german(internationalMap.get("de")); String prompt = german.createPrompt(twoYears()); assertEquals(case_vignette, prompt.trim()); } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java index ae6cbde..25095d5 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java @@ -28,7 +28,7 @@ private static Stream testGetIndividualDescription() { femaleNoAge(), new TestOutcome.Ok("Die Patientin stellte sich mit den folgenden Symptomen vor: ")), new TestIndividual("female, no HPOs", femaleNoHPOs(), new TestOutcome.Error(() -> new PhenolRuntimeException("No HPO annotations"))), - new TestIndividual("unknown sex, no 4mo", + new TestIndividual("unknown sex, no 4yo", unknownSex4YearsOnset(), new TestOutcome.Ok("Der Patient stellte sich in der Kindheit mit den folgenden Symptomen vor: ")) ); } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalianTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalianTest.java index 334ce96..f501786 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalianTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalianTest.java @@ -28,7 +28,7 @@ private static Stream testGetIndividualDescription() { femaleNoAge(), new TestOutcome.Ok("Il soggetto era una femmina che si è presentata con")), new TestIndividual("female, no HPOs", femaleNoHPOs(), new TestOutcome.Error(() -> new PhenolRuntimeException("Nessuna anomalia fenotipica"))), - new TestIndividual("unknown sex, no 4mo", + new TestIndividual("unknown sex, no 4yo", unknownSex4YearsOnset(), new TestOutcome.Ok("Il soggetto si è presentato da bambino con")) ); } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java index 5d88626..a1fe1f4 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java @@ -28,7 +28,7 @@ private static Stream testGetIndividualDescription() { femaleNoAge(), new TestOutcome.Ok("La paciente se presentó con")), new TestIndividual("female, no HPOs", femaleNoHPOs(), new TestOutcome.Error(() -> new PhenolRuntimeException("No HPO annotations"))), - new TestIndividual("unknown sex, no 4mo", + new TestIndividual("unknown sex, no 4yo", unknownSex4YearsOnset(), new TestOutcome.Ok("El paciente se presentó en la niñez con")) ); } From bd8fc304679ef0205e933d0ab1ead1a3855fa042 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Fri, 7 Jun 2024 17:48:57 +0200 Subject: [PATCH 38/39] copy phenopackets to output --- .../cmd/GbtTranslateBatchCommand.java | 14 ++++- .../phenopacket2prompt/output/PpktCopy.java | 53 +++++++++++++++++++ .../impl/dutch/DutchPromptGenerator.java | 5 -- .../impl/german/PpktIndividualGerman.java | 6 ++- .../spanish/PpktPhenotypicfeatureSpanish.java | 4 +- 5 files changed, 71 insertions(+), 11 deletions(-) create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktCopy.java diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java index 20c5315..67ad96f 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java @@ -9,6 +9,7 @@ import org.monarchinitiative.phenopacket2prompt.model.PhenopacketDisease; import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; import org.monarchinitiative.phenopacket2prompt.output.CorrectResult; +import org.monarchinitiative.phenopacket2prompt.output.PpktCopy; import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,6 +42,10 @@ public class GbtTranslateBatchCommand implements Callable { description = "path to translations file") private String translationsPath = "data/hp-international.obo"; + @CommandLine.Option(names = {"-o", "--outdir"}, + description = "path to outdir") + private String outdirname = "prompts"; + @CommandLine.Option(names = {"-d", "--dir"}, description = "Path to directory with JSON phenopacket files", required = true) private String ppktDir; @@ -61,10 +66,11 @@ public Integer call() throws Exception { return 1; } HpInternationalOboParser oboParser = new HpInternationalOboParser(translationsFile); + Map internationalMap = oboParser.getLanguageToInternationalMap(); LOGGER.info("Got {} translations", internationalMap.size()); List ppktFiles = getAllPhenopacketJsonFiles(); - createDir("prompts"); + createDir(outdirname); List correctResultList = outputPromptsEnglish(ppktFiles, hpo); // output all non-English languages here @@ -86,7 +92,11 @@ public Integer call() throws Exception { PromptGenerator italian = PromptGenerator.italian(internationalMap.get("it")); outputPromptsInternational(ppktFiles, hpo, "it", italian); resetOutput("finished"); - + // output original phenopackets + PpktCopy pcopy = new PpktCopy(new File(outdirname)); + for (var file : ppktFiles) { + pcopy.copyFile(file); + } // output file with correct diagnosis list outputCorrectResults(correctResultList); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktCopy.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktCopy.java new file mode 100644 index 0000000..996b4d0 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktCopy.java @@ -0,0 +1,53 @@ +package org.monarchinitiative.phenopacket2prompt.output; + + +import org.monarchinitiative.phenol.base.PhenolRuntimeException; + +import java.io.*; + +/** + * Class to copy phenopackets from the input directory to an output directory so that we have all of the files + * used for an experiment in one place. + */ +public class PpktCopy { + + private final File ppkt_out_dir; + + + public PpktCopy(File outdirectory) { + ppkt_out_dir = new File(outdirectory + File.separator + "original_phenopackets"); + createDir(ppkt_out_dir); + } + + + + private void createDir(File path) { + if (! path.exists() ) { + boolean result = path.mkdir(); + if (! result) { + throw new PhenolRuntimeException("Could not create output directory at " + path); + } + } + } + + public void copyFile(File sourceLocation) { + try { + String fname = sourceLocation.getName(); + File outfile = new File(ppkt_out_dir + File.separator + fname); + + InputStream in = new FileInputStream(sourceLocation); + OutputStream out = new FileOutputStream(outfile); + + // Copy the bits from instream to outstream + byte[] buf = new byte[1024]; + int len; + while ((len = in.read(buf)) > 0) { + out.write(buf, 0, len); + } + in.close(); + out.close(); + } catch (IOException e) { + throw new PhenolRuntimeException(e.getMessage()); + } + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java index 6e89600..0fe24d4 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java @@ -1,6 +1,5 @@ package org.monarchinitiative.phenopacket2prompt.output.impl.dutch; -import org.monarchinitiative.phenol.ontology.data.Ontology; import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; @@ -15,9 +14,6 @@ public class DutchPromptGenerator implements PromptGenerator { - private final Ontology hpo; - - private final PPKtIndividualInfoGenerator ppktAgeSexGenerator; private final PhenopacketTextGenerator ppktTextGenerator; @@ -27,7 +23,6 @@ public class DutchPromptGenerator implements PromptGenerator { public DutchPromptGenerator(PpktPhenotypicFeatureGenerator pfgen) { - this.hpo = hpo; ppktAgeSexGenerator = new PpktIndividualDutch(); ppktTextGenerator = new PpktTextDutch(); this.ppktPhenotypicFeatureGenerator = pfgen; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java index ccbdc7d..8b134ce 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java @@ -1,6 +1,5 @@ package org.monarchinitiative.phenopacket2prompt.output.impl.german; -import com.sun.source.tree.BreakTree; import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenopacket2prompt.model.*; import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; @@ -449,7 +448,10 @@ public String atAge(PhenopacketAge ppktAge) { case "Neonatal onset" -> "In der neugeborenen Zeit"; case "Congenital onset" -> "Zum Zeitpunkt der Geburt"; case "Adult onset" -> "Im Erwachsenenalter"; - default-> String.format("TODO TODO el %s período", label.replace(" onset", "")); + case "Juvenile onset" -> "Im Jugendlichenalter"; + default-> { + throw new PhenolRuntimeException("No German translation for " + label); + } }; } else { return ""; // should never get here diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java index 7876582..629ccb0 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java @@ -95,9 +95,9 @@ public String formatFeatures(List ontologyTerms) { } else { String exclusion; if (excludedLabels.size() == 1) { - exclusion = String.format(" En cambio, se descartó %s.", getOxfordCommaList(excludedLabels)); + exclusion = String.format(". En cambio, se descartó %s.", getOxfordCommaList(excludedLabels)); } else { - exclusion = String.format(" En cambio, se descartaron %s.", getOxfordCommaList(excludedLabels)); + exclusion = String.format(". En cambio, se descartaron %s.", getOxfordCommaList(excludedLabels)); } return getOxfordCommaList(observedLabels) + exclusion; } From ce6633dc606f4bbad0de8c5c321bffc4eddc10a3 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Fri, 7 Jun 2024 17:51:55 +0200 Subject: [PATCH 39/39] version bump --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d78f0b4..89a5e67 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.monarchinitiative phenopacket2prompt - 0.3.16 + 0.4.0 phenopacket2prompt https://github.com/monarch-initiative/phenopacket2prompt