diff --git a/mkdocs.yml b/mkdocs.yml index 1b7d9af..8aa20a5 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -35,6 +35,7 @@ nav: - "Template": 'languages.md' - "English": "english.md" - Setup: "setup.md" + - Batch: "batch.md" plugins: - search diff --git a/pom.xml b/pom.xml index cd43cac..89a5e67 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.monarchinitiative phenopacket2prompt - 0.3.14 + 0.4.0 phenopacket2prompt https://github.com/monarch-initiative/phenopacket2prompt @@ -186,8 +186,8 @@ maven-compiler-plugin 3.8.1 - ${java.version} - ${java.version} + 21 + 21 diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java index c70d93d..67ad96f 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java @@ -9,6 +9,7 @@ import org.monarchinitiative.phenopacket2prompt.model.PhenopacketDisease; import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; import org.monarchinitiative.phenopacket2prompt.output.CorrectResult; +import org.monarchinitiative.phenopacket2prompt.output.PpktCopy; import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -23,6 +24,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.Callable; @CommandLine.Command(name = "batch", aliases = {"B"}, @@ -40,9 +42,16 @@ public class GbtTranslateBatchCommand implements Callable { description = "path to translations file") private String translationsPath = "data/hp-international.obo"; + @CommandLine.Option(names = {"-o", "--outdir"}, + description = "path to outdir") + private String outdirname = "prompts"; + @CommandLine.Option(names = {"-d", "--dir"}, description = "Path to directory with JSON phenopacket files", required = true) private String ppktDir; + private String currentLanguageCode = null; + private int currentCount; + @Override public Integer call() throws Exception { File hpJsonFile = new File(hpoJsonPath); @@ -57,19 +66,51 @@ public Integer call() throws Exception { return 1; } HpInternationalOboParser oboParser = new HpInternationalOboParser(translationsFile); + Map internationalMap = oboParser.getLanguageToInternationalMap(); LOGGER.info("Got {} translations", internationalMap.size()); List ppktFiles = getAllPhenopacketJsonFiles(); - createDir("prompts"); + createDir(outdirname); List correctResultList = outputPromptsEnglish(ppktFiles, hpo); // output all non-English languages here - PromptGenerator spanish = PromptGenerator.spanish(hpo, internationalMap.get("es")); + + // SPANISH + PromptGenerator spanish = PromptGenerator.spanish(internationalMap.get("es")); + resetOutput("es"); outputPromptsInternational(ppktFiles, hpo, "es", spanish); + + resetOutput("nl"); + PromptGenerator dutch = PromptGenerator.dutch(internationalMap.get("nl")); + outputPromptsInternational(ppktFiles, hpo, "nl", dutch); + // GERMAN + resetOutput("de"); + PromptGenerator german = PromptGenerator.german(internationalMap.get("de")); + outputPromptsInternational(ppktFiles, hpo, "de", german); + + // ITALIAN + resetOutput("it"); + PromptGenerator italian = PromptGenerator.italian(internationalMap.get("it")); + outputPromptsInternational(ppktFiles, hpo, "it", italian); + resetOutput("finished"); + // output original phenopackets + PpktCopy pcopy = new PpktCopy(new File(outdirname)); + for (var file : ppktFiles) { + pcopy.copyFile(file); + } + // output file with correct diagnosis list outputCorrectResults(correctResultList); return 0; } + private void resetOutput(String es) { + if (currentLanguageCode != null) { + System.out.printf("Finished writing %d phenopackets in %s\n", currentCount, currentLanguageCode); + } + currentLanguageCode = es; + currentCount = 0; + } + private void outputCorrectResults(List correctResultList) { File outfile = new File("prompts" + File.separator + "correct_results.tsv"); try (BufferedWriter bw = new BufferedWriter(new FileWriter(outfile))) { @@ -79,12 +120,12 @@ private void outputCorrectResults(List correctResultList) { } catch (IOException e) { e.printStackTrace(); } - System.out.printf("[INFO] Output a total of %d prompts in en and es.\n", correctResultList.size()); + System.out.printf("[INFO] Output a total of %d prompts in en, es, nl, de, and it.\n", correctResultList.size()); } - private String getFileName(String phenopacketID) { - return phenopacketID.replaceAll("[^\\w]", phenopacketID).replaceAll("/","_") + "-prompt.txt"; + private String getFileName(String phenopacketID, String languageCode) { + return phenopacketID.replaceAll("[^\\w]","_") + "_" + languageCode + "-prompt.txt"; } @@ -94,21 +135,28 @@ private void outputPromptsInternational(List ppktFiles, Ontology hpo, Stri createDir(dirpath); List diagnosisList = new ArrayList<>(); for (var f: ppktFiles) { - PpktIndividual individual = new PpktIndividual(f); + PpktIndividual individual = PpktIndividual.fromFile(f); List diseaseList = individual.getDiseases(); if (diseaseList.size() != 1) { - System.err.println(String.format("[ERROR] Got %d diseases for %s.\n", diseaseList.size(), individual.getPhenopacketId())); - continue; + String errmsg = String.format("[ERROR] Got %d diseases for %s.\n", diseaseList.size(), individual.getPhenopacketId()); + throw new PhenolRuntimeException(errmsg); } PhenopacketDisease pdisease = diseaseList.get(0); - String promptFileName = getFileName( individual.getPhenopacketId()); + String promptFileName = getFileName( individual.getPhenopacketId(), languageCode); String diagnosisLine = String.format("%s\t%s\t%s\t%s", pdisease.getDiseaseId(), pdisease.getLabel(), promptFileName, f.getAbsolutePath()); try { diagnosisList.add(diagnosisLine); String prompt = generator.createPrompt(individual); outputPrompt(prompt, promptFileName, dirpath); } catch (Exception e) { - e.printStackTrace(); + System.err.printf("[ERROR] Could not process %s: %s\n", promptFileName, e.getMessage()); + //e.printStackTrace(); + } + } + Set missing = generator.getMissingTranslations(); + if (! missing.isEmpty()) { + for (var m : missing) { + System.out.printf("[%s] Missing: %s\n", languageCode, m); } } } @@ -117,17 +165,17 @@ private void outputPromptsInternational(List ppktFiles, Ontology hpo, Stri private List outputPromptsEnglish(List ppktFiles, Ontology hpo) { createDir("prompts/en"); List correctResultList = new ArrayList<>(); - PromptGenerator generator = PromptGenerator.english(hpo); + PromptGenerator generator = PromptGenerator.english(); for (var f: ppktFiles) { - PpktIndividual individual = new PpktIndividual(f); + PpktIndividual individual = PpktIndividual.fromFile(f); List diseaseList = individual.getDiseases(); if (diseaseList.size() != 1) { - System.err.println(String.format("[ERROR] Got %d diseases for %s.\n", diseaseList.size(), individual.getPhenopacketId())); + System.err.printf("[ERROR] Got %d diseases for %s.\n", diseaseList.size(), individual.getPhenopacketId()); continue; } PhenopacketDisease pdisease = diseaseList.get(0); - String promptFileName = getFileName( individual.getPhenopacketId()); + String promptFileName = getFileName( individual.getPhenopacketId(), "en"); String diagnosisLine = String.format("%s\t%s\t%s\t%s", pdisease.getDiseaseId(), pdisease.getLabel(), promptFileName, f.getAbsolutePath()); try { String prompt = generator.createPrompt(individual); @@ -150,7 +198,8 @@ private void outputPrompt(String prompt, String promptFileName, String dir) { } catch (IOException e) { e.printStackTrace(); } - System.out.print("."); + System.out.printf("%s %d.\r", currentLanguageCode, currentCount); + currentCount++; } @@ -177,6 +226,9 @@ private List getAllPhenopacketJsonFiles() { for (File item : items) { if (item.isDirectory()) ppktDirectories.add(ppktDir+item.getName()); + else if (item.isFile() && item.getName().endsWith(".json")) { + ppktFiles.add(item); + } } for (var f: ppktDirectories) { File subdir = new File(f); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java index 88e311e..478bf60 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GptTranslateCommand.java @@ -34,6 +34,9 @@ public class GptTranslateCommand implements Callable { @CommandLine.Option(names = {"-p", "--ppkt"}, description = "Path to JSON phenopacket file", required = true) private String ppkt; + @CommandLine.Option(names = {"-l", "--language"}, description = "Language code", defaultValue = "de") + private String languageCode; + @Override public Integer call() throws Exception { @@ -54,13 +57,31 @@ public Integer call() throws Exception { System.out.println(hpo.version().orElse("n/a")); - PromptGenerator generator = PromptGenerator.english(hpo); - PpktIndividual individual = new PpktIndividual(new File(ppkt)); + PromptGenerator generator = PromptGenerator.english(); + PpktIndividual individual = PpktIndividual.fromFile(new File(ppkt)); String prompt = generator.createPrompt(individual); System.out.println(prompt); - System.out.println("SPANISH"); - PromptGenerator spanish = PromptGenerator.spanish(hpo, internationalMap.get("es")); - prompt = spanish.createPrompt(individual); + switch (languageCode) { + case "de" -> { + PromptGenerator german = PromptGenerator.german(internationalMap.get("de")); + prompt = german.createPrompt(individual); + } + case "es" -> { + PromptGenerator spanish = PromptGenerator.spanish(internationalMap.get("es")); + prompt = spanish.createPrompt(individual); + } + case "nl" -> { + PromptGenerator dutch = PromptGenerator.dutch(internationalMap.get("nl")); + prompt = dutch.createPrompt(individual); + } + case "it" -> { + PromptGenerator italian = PromptGenerator.italian(internationalMap.get("it")); + prompt = italian.createPrompt(individual); + } + default -> prompt = "did not recognize language code " + languageCode; + } + + System.out.println(prompt); return 0; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java index 52a4824..e674af7 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/international/HpInternationalOboParser.java @@ -27,7 +27,7 @@ public class HpInternationalOboParser { * @return in this case "tr" */ public Optional getLanguage(String annots) { - final String translation = "translation:language=\"(\\w{2,2})\""; + final String translation = "translation:language=\"(\\w{2,3})\""; final Pattern pattern = Pattern.compile(translation); Matcher matcher = pattern.matcher(annots); if (matcher.find()) { @@ -53,7 +53,7 @@ public Optional getTranslation(String annots) { public HpInternationalOboParser(File file) { languageToInternationalMap = new HashMap<>(); String pattern = "id: (HP:\\d{7,7})"; - Set acronyms = Set.of("cs", "en", "es", "fr", "ja", "nl", "nna", "tr", "tw", "zh"); + Set acronyms = Set.of("cs", "en", "de", "dtp", "it", "es", "fr", "ja", "nl", "nna", "tr", "tw", "zh"); for (String acronym : acronyms) { languageToInternationalMap.put(acronym, new HpInternational(acronym)); } @@ -84,9 +84,13 @@ public HpInternationalOboParser(File file) { Optional opt = getLanguage(annots); if (opt.isPresent()) { String language = opt.get(); + if (! languageToInternationalMap.containsKey(language)) { + System.err.println("[ERROR] Could not find language \"" + language + "\""); + continue; + } languageToInternationalMap.get(language).addTerm(currentHpoTermId, hpoLabel); } else { - System.err.printf("[ERROR] Could not extract language for %s.", line); + System.err.printf("[ERROR] Could not extract language for %s.\n", line); } } @@ -98,13 +102,13 @@ public HpInternationalOboParser(File file) { } catch (IOException e) { e.printStackTrace(); } - for (String language : languageToInternationalMap.keySet()) { + /*for (String language : languageToInternationalMap.keySet()) { System.out.println(language); HpInternational international = languageToInternationalMap.get(language); for (var entry : international.getTermIdToLabelMap().entrySet()) { System.out.printf("\t%s: %s\n", entry.getKey().getValue(), entry.getValue()); } - } + }*/ } public Map getLanguageToInternationalMap() { diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/AgeNotSpecified.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/AgeNotSpecified.java index e94e62e..e2e9951 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/AgeNotSpecified.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/AgeNotSpecified.java @@ -1,6 +1,8 @@ package org.monarchinitiative.phenopacket2prompt.model; -public class AgeNotSpecified implements PhenopacketAge { +import java.util.Objects; + +public final class AgeNotSpecified implements PhenopacketAge { @Override public String age() { return ""; @@ -43,4 +45,18 @@ public int totalDays() { @Override public boolean specified() {return false; } + + @Override + public int hashCode() { + return Objects.hashCode(totalDays()); + } + + @Override + public boolean equals(Object obj) { + if (! (obj instanceof PhenopacketAge)) return false; + PhenopacketAge iso = (PhenopacketAge) obj; + return iso.totalDays() == totalDays(); + } + + } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/HpoOnsetAge.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/HpoOnsetAge.java index b8a487f..c89ea38 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/HpoOnsetAge.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/HpoOnsetAge.java @@ -3,10 +3,11 @@ import org.monarchinitiative.phenol.annotations.formats.hpo.HpoOnset; import org.monarchinitiative.phenol.ontology.data.TermId; +import java.util.Objects; import java.util.Optional; import java.util.Set; -public class HpoOnsetAge implements PhenopacketAge { +public final class HpoOnsetAge implements PhenopacketAge { private final TermId tid; private final String label; @@ -14,31 +15,38 @@ public class HpoOnsetAge implements PhenopacketAge { private final int totalDays; - /** One of Antenatal onset HP:0030674; Fetal onset HP:0011461; Late first trimester onset HP:0034199; - * Third trimester onset HP:0034197; Second trimester onset HP:0034198; Embryonal onset HP:0011460*/ - private final static Set fetalIds = Set.of(TermId.of(" HP:0030674"), TermId.of("HP:0011461"), TermId.of("HP:0034199"), + /** + * One of Antenatal onset HP:0030674; Fetal onset HP:0011461; Late first trimester onset HP:0034199; + * Third trimester onset HP:0034197; Second trimester onset HP:0034198; Embryonal onset HP:0011460 + */ + private final static Set fetalIds = Set.of(TermId.of(" HP:0030674"), TermId.of("HP:0011461"), TermId.of("HP:0034199"), TermId.of("HP:0034197"), TermId.of("HP:0034198"), TermId.of("HP:0011460*")); - /** Childhood onset */ + /** + * Childhood onset + */ private final static TermId childhoodOnset = TermId.of("HP:0011463"); private final static TermId juvenileOnset = TermId.of("HP:0003621"); - /** Infantile onset */ + /** + * Infantile onset + */ private final static TermId infantileOnset = TermId.of("HP:0003593"); - /** Congenital onset */ + /** + * Congenital onset + */ private final static TermId congenitalOnset = TermId.of("HP:0003577"); - public HpoOnsetAge(String id, String label) { this.tid = TermId.of(id); this.label = label; Optional opt = HpoOnset.fromTermId(tid); if (opt.isPresent()) { HpoOnset onset = opt.get(); - totalDays = (int) ( onset.start().days() /2+ onset.end().days()/2); + totalDays = (int) (onset.start().days() / 2 + onset.end().days() / 2); } else { totalDays = Integer.MAX_VALUE; } @@ -90,4 +98,36 @@ public int totalDays() { public TermId getTid() { return tid; } + + + public static HpoOnsetAge childhood() { + return new HpoOnsetAge(childhoodOnset.getValue(), "Childhood onset"); + } + + public static HpoOnsetAge juvenile() { + return new HpoOnsetAge(juvenileOnset.getValue(), "Juvenile onset"); + } + + + public static HpoOnsetAge infantile() { + return new HpoOnsetAge(infantileOnset.getValue(), "Infantile onset"); + } + + + public static HpoOnsetAge congenital() { + return new HpoOnsetAge(congenitalOnset.getValue(), "Congenital onset"); + } + + @Override + public int hashCode() { + return Objects.hashCode(totalDays()); + } + + @Override + public boolean equals(Object obj) { + if (! (obj instanceof PhenopacketAge)) return false; + PhenopacketAge iso = (PhenopacketAge) obj; + return iso.totalDays() == totalDays(); + } + } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java index 34e5781..990174c 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/Iso8601Age.java @@ -2,7 +2,9 @@ import org.monarchinitiative.phenol.base.PhenolRuntimeException; -public class Iso8601Age implements PhenopacketAge { +import java.util.Objects; + +public final class Iso8601Age implements PhenopacketAge { private final String iso8601; @@ -57,12 +59,14 @@ public int getDays() { @Override public String age() { StringBuilder sb = new StringBuilder(); - if (years > 0) { - return String.format("%d year-old", years); + if (years == 1) { + return "one year"; + } else if (years > 1) { + return String.format("%d years", years); } else if (months > 0) { - return String.format("%d month-old", months); + return String.format("%d months", months); } else { - return String.format("%d day-old", days); + return String.format("%d days", days); } } @@ -74,12 +78,12 @@ public PhenopacketAgeType ageType() { @Override public boolean isJuvenile() { - return years >= 10 && years < 18; + return years >= 6 && years < 18; } @Override public boolean isChild() { - return years >= 1 && years < 10; + return years >= 1 && years < 6; } @Override @@ -103,4 +107,17 @@ public boolean isCongenital() { public int totalDays() { return totalDays; } + + + @Override + public int hashCode() { + return Objects.hashCode(totalDays()); + } + + @Override + public boolean equals(Object obj) { + if (! (obj instanceof PhenopacketAge)) return false; + PhenopacketAge iso = (PhenopacketAge) obj; + return iso.totalDays() == totalDays(); + } } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PhenopacketAge.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PhenopacketAge.java index 32c0902..94c8498 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PhenopacketAge.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PhenopacketAge.java @@ -1,6 +1,6 @@ package org.monarchinitiative.phenopacket2prompt.model; -public interface PhenopacketAge { +public sealed interface PhenopacketAge permits AgeNotSpecified, HpoOnsetAge, Iso8601Age { String age(); PhenopacketAgeType ageType(); @@ -18,4 +18,5 @@ public interface PhenopacketAge { int totalDays(); default boolean specified() {return true; } + } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividual.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividual.java index a00aff6..7abd403 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividual.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividual.java @@ -17,14 +17,19 @@ import java.util.*; public class PpktIndividual { - final Logger LOGGER = LoggerFactory.getLogger(PpktIndividual.class); + private static final Logger LOGGER = LoggerFactory.getLogger(PpktIndividual.class); private final Phenopacket ppkt; private final String phenopacketId; - public PpktIndividual(File ppktJsonFile) { + public PpktIndividual(Phenopacket ppkt) { + this.ppkt = ppkt; + this.phenopacketId = ppkt.getId(); + } + + public static PpktIndividual fromFile(File ppktJsonFile) { JSONParser parser = new JSONParser(); try { Object obj = parser.parse(new FileReader(ppktJsonFile)); @@ -32,14 +37,20 @@ public PpktIndividual(File ppktJsonFile) { String phenopacketJsonString = jsonObject.toJSONString(); Phenopacket.Builder phenoPacketBuilder = Phenopacket.newBuilder(); JsonFormat.parser().merge(phenopacketJsonString, phenoPacketBuilder); - this.ppkt = phenoPacketBuilder.build(); + Phenopacket ppkt = phenoPacketBuilder.build(); + return new PpktIndividual(ppkt); } catch (IOException | ParseException e1) { LOGGER.error("Could not ingest phenopacket: {}", e1.getMessage()); throw new PhenolRuntimeException("Could not load phenopacket at " + ppktJsonFile); } - this.phenopacketId = ppkt.getId(); } + public static PpktIndividual fromPhenopacket(Phenopacket ppkt) { + return new PpktIndividual(ppkt); + } + + + public String getPhenopacketId() { return phenopacketId; } @@ -202,4 +213,8 @@ public Map> getSpecifiedAgePhenotypicFeatures } return ageToFeatureMap; } + + public int annotationCount() { + return ppkt.getPhenotypicFeaturesCount(); + } } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketIndividualInformationGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualInfoGenerator.java similarity index 87% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketIndividualInformationGenerator.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualInfoGenerator.java index 7506bb1..86a8414 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketIndividualInformationGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualInfoGenerator.java @@ -4,7 +4,7 @@ import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; -public interface PhenopacketIndividualInformationGenerator { +public interface PPKtIndividualInfoGenerator { String getIndividualDescription(PpktIndividual individual); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketTextGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketTextGenerator.java index b850365..250b66c 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketTextGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PhenopacketTextGenerator.java @@ -1,7 +1,5 @@ package org.monarchinitiative.phenopacket2prompt.output; -import java.util.List; - public interface PhenopacketTextGenerator { @@ -9,27 +7,4 @@ public interface PhenopacketTextGenerator { String QUERY_HEADER(); - - /** - * @param items a list of HPO labels, e.g., X and Y and Z - * @return A string formatted as X, Y, and Z. - */ - default String getOxfordCommaList(List items, String andWord) { - if (items.size() == 2) { - // no comma if we just have two items. - // one item will work with the below code - String andWithSpace = String.format(" %s ", andWord); - return String.join(andWithSpace, items) + "."; - } - StringBuilder sb = new StringBuilder(); - String symList = String.join(", ", items); - int jj = symList.lastIndexOf(", "); - if (jj > 0) { - String andWithSpaceAndComma = String.format(", %s ", andWord); - symList = symList.substring(0, jj) + andWithSpaceAndComma + symList.substring(jj+2); - } - sb.append(symList).append("."); - return sb.toString(); - } - } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktCopy.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktCopy.java new file mode 100644 index 0000000..996b4d0 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktCopy.java @@ -0,0 +1,53 @@ +package org.monarchinitiative.phenopacket2prompt.output; + + +import org.monarchinitiative.phenol.base.PhenolRuntimeException; + +import java.io.*; + +/** + * Class to copy phenopackets from the input directory to an output directory so that we have all of the files + * used for an experiment in one place. + */ +public class PpktCopy { + + private final File ppkt_out_dir; + + + public PpktCopy(File outdirectory) { + ppkt_out_dir = new File(outdirectory + File.separator + "original_phenopackets"); + createDir(ppkt_out_dir); + } + + + + private void createDir(File path) { + if (! path.exists() ) { + boolean result = path.mkdir(); + if (! result) { + throw new PhenolRuntimeException("Could not create output directory at " + path); + } + } + } + + public void copyFile(File sourceLocation) { + try { + String fname = sourceLocation.getName(); + File outfile = new File(ppkt_out_dir + File.separator + fname); + + InputStream in = new FileInputStream(sourceLocation); + OutputStream out = new FileOutputStream(outfile); + + // Copy the bits from instream to outstream + byte[] buf = new byte[1024]; + int len; + while ((len = in.read(buf)) > 0) { + out.write(buf, 0, len); + } + in.close(); + out.close(); + } catch (IOException e) { + throw new PhenolRuntimeException(e.getMessage()); + } + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java index 3f2b24a..c8df74c 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktPhenotypicFeatureGenerator.java @@ -3,11 +3,45 @@ import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; import java.util.List; +import java.util.Set; +import java.util.function.Predicate; public interface PpktPhenotypicFeatureGenerator { + String formatFeatures(List ontologyTerms); + + + default List getObservedFeaturesAsStr(List oterms) { + return oterms.stream() + .filter(Predicate.not(OntologyTerm::isExcluded)) + .map(OntologyTerm::getLabel) + .toList(); + } + + default List getExcludedFeaturesAsStr(List oterms) { + return oterms.stream() + .filter(OntologyTerm::isExcluded) + .map(OntologyTerm::getLabel) + .toList(); + } + + default List getObservedFeatures(List oterms) { + return oterms.stream() + .filter(Predicate.not(OntologyTerm::isExcluded)) + .toList(); + } + + default List getExcludedFeatures(List oterms) { + return oterms.stream() + .filter(OntologyTerm::isExcluded) + + .toList(); + } + + default Set getMissingTranslations() { + return Set.of(); + } - String formatFeatures( List ontologyTerms); } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java index 6603b38..7586ccd 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PromptGenerator.java @@ -1,16 +1,21 @@ package org.monarchinitiative.phenopacket2prompt.output; -import org.monarchinitiative.phenol.ontology.data.Ontology; import org.monarchinitiative.phenopacket2prompt.international.HpInternational; import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; import org.monarchinitiative.phenopacket2prompt.output.impl.english.EnglishPromptGenerator; +import org.monarchinitiative.phenopacket2prompt.output.impl.german.GermanPromptGenerator; +import org.monarchinitiative.phenopacket2prompt.output.impl.german.PpktPhenotypicfeatureGerman; import org.monarchinitiative.phenopacket2prompt.output.impl.spanish.*; +import org.monarchinitiative.phenopacket2prompt.output.impl.dutch.*; +import org.monarchinitiative.phenopacket2prompt.output.impl.italian.*; + import java.util.List; import java.util.Map; +import java.util.Set; public interface PromptGenerator { @@ -24,15 +29,31 @@ public interface PromptGenerator { String getVignetteAtAge(PhenopacketAge page, PhenopacketSex psex, List terms); - public static PromptGenerator english(Ontology ontology){ - return new EnglishPromptGenerator(ontology); + static PromptGenerator english(){ + return new EnglishPromptGenerator(); } - static PromptGenerator spanish(Ontology hpo, HpInternational international) { + static PromptGenerator spanish(HpInternational international) { PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureSpanish(international); - return new SpanishPromptGenerator(hpo, pfgen); + return new SpanishPromptGenerator(pfgen); + } + + + static PromptGenerator dutch(HpInternational international) { + PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureDutch(international); + return new DutchPromptGenerator(pfgen); + } + + static PromptGenerator german(HpInternational international) { + PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureGerman(international); + return new GermanPromptGenerator(pfgen); + } + static PromptGenerator italian(HpInternational international) { + PpktPhenotypicFeatureGenerator pfgen = new PpktPhenotypicfeatureItalian(international); + return new ItalianPromptGenerator(pfgen); } + /** * The following structure should work for most other languages, but the function * can be overridden if necessary. @@ -59,7 +80,9 @@ default String createPrompt(PpktIndividual individual) { } - + default Set getMissingTranslations() { + return Set.of(); + } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java new file mode 100644 index 0000000..0fe24d4 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java @@ -0,0 +1,58 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.dutch; + +import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketTextGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator; + +import java.util.List; +import java.util.Set; + +public class DutchPromptGenerator implements PromptGenerator { + + private final PPKtIndividualInfoGenerator ppktAgeSexGenerator; + + private final PhenopacketTextGenerator ppktTextGenerator; + + private final PpktPhenotypicFeatureGenerator ppktPhenotypicFeatureGenerator; + + + + public DutchPromptGenerator(PpktPhenotypicFeatureGenerator pfgen) { + ppktAgeSexGenerator = new PpktIndividualDutch(); + ppktTextGenerator = new PpktTextDutch(); + this.ppktPhenotypicFeatureGenerator = pfgen; + } + + @Override + public String queryHeader() { + return ppktTextGenerator.QUERY_HEADER(); + } + + @Override + public String getIndividualInformation(PpktIndividual ppktIndividual) { + return this.ppktAgeSexGenerator.getIndividualDescription(ppktIndividual); + } + + @Override + public String formatFeatures(List ontologyTerms) { + return ppktPhenotypicFeatureGenerator.formatFeatures(ontologyTerms); + } + + @Override + public String getVignetteAtAge(PhenopacketAge page, PhenopacketSex psex, List terms) { + String ageString = this.ppktAgeSexGenerator.atAge(page); + String features = formatFeatures(terms); + return String.format("%s, %s presenteerde met %s", ageString, ppktAgeSexGenerator.heSheIndividual(psex), features); + } + + + @Override + public Set getMissingTranslations() { + return this.ppktPhenotypicFeatureGenerator.getMissingTranslations(); + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java new file mode 100644 index 0000000..ac0f553 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktIndividualDutch.java @@ -0,0 +1,523 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.dutch; + +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenopacket2prompt.model.*; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +public class PpktIndividualDutch implements PPKtIndividualInfoGenerator { + + + /** + * Equivalent of "The clinical + * @param individual + * @return + */ + public String ageAndSexAtOnset(PpktIndividual individual) { + Optional ageOpt = individual.getAgeAtOnset(); + return ""; + } + + + + + public String ageAndSexAtLastExamination(PpktIndividual individual) { + PhenopacketSex psex = individual.getSex(); + Optional ageOpt = individual.getAgeAtLastExamination(); + if (ageOpt.isEmpty()) { + ageOpt = individual.getAgeAtOnset(); + } + String sex; + switch (psex) { + case FEMALE -> sex = "zij"; + case MALE -> sex = "hij"; + default -> sex = "de persoon"; + }; + + if (ageOpt.isEmpty()) { + return sex; + } + PhenopacketAge age = ageOpt.get(); + if (age.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoage = (Iso8601Age) age; + int y = isoage.getYears(); + int m = isoage.getMonths(); + int d = isoage.getDays(); + if (psex.equals(PhenopacketSex.FEMALE)) { + if (y > 17) { + return String.format("vrouw van %d jaar oud", y); + } else if (y > 9) { + return String.format("adolescente vrouw van %d jaar oud", y); + } else if (y > 0) { + return String.format("meisje van %d jaar oud", y); + } else if (m>0) { + return String.format("baby van %d maanden oud", m); + } else { + return String.format("pasgeboren vrouwelijke baby van %d dagen oud", d); + } + } + } else { + // age is an HPO onset term, we do not have an exact date + } + if (age.isChild()) { + return switch (psex) { + case FEMALE -> "meisje"; + case MALE -> "jongetje"; + default -> "kind"; // difficult to be gender neutral + }; + } else if (age.isCongenital()) { + return switch (psex) { + case FEMALE -> "pasgeboren meisje"; + case MALE -> "pasgeboren jongetje"; + default -> "pasgeborene"; + }; + } else if (age.isFetus()) { + return switch (psex) { + case FEMALE -> "vrouwelijke foetus"; + case MALE -> "mannelijke foetus"; + default -> "foetus"; + }; + } else if (age.isInfant()) { + return switch (psex) { + case FEMALE -> "vrouwelijke baby"; + case MALE -> "mannelijke baby"; + default -> "baby"; + }; + } else { + return switch (psex) { + case FEMALE -> "vrouw"; + case MALE -> "man"; + default -> "volwassene"; + }; + } + } + + + private String individualName(PpktIndividual individual) { + PhenopacketSex psex = individual.getSex(); + Optional ageOpt = individual.getAgeAtLastExamination(); + if (ageOpt.isEmpty()) { + ageOpt = individual.getAgeAtOnset(); + } + if (ageOpt.isEmpty()) { + return switch (psex) { + case FEMALE -> "vrouw"; + case MALE -> "man"; + default -> "individu"; + }; + } + PhenopacketAge age = ageOpt.get();; + if (age.isChild()) { + return switch (psex) { + case FEMALE -> "meisje"; + case MALE -> "jongetje"; + default -> "kind"; + }; + } else if (age.isCongenital()) { + return switch (psex) { + case FEMALE -> "pasgeboren vrouwelijke baby"; + case MALE -> "pasgeboren mannelijke baby"; + default -> "pasgeborene"; + }; + } else if (age.isFetus()) { + return switch (psex) { + case FEMALE -> "vrouwelijke foetus"; + case MALE -> "mannelijke foetus"; + default -> "foetus"; + }; + } else if (age.isInfant()) { + return switch (psex) { + case FEMALE -> "vrouwelijke baby"; + case MALE -> "mannelijke baby"; + default -> "baby"; + }; + } else { + return switch (psex) { + case FEMALE -> "vrouw"; + case MALE -> "man"; + default -> "individu"; + }; + } + } + + + /* @Override + public String individualWithAge(PhenopacketAge ppktAge) { + if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + return ppktAge.age() + " old"; + } else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + String label = ppktAge.age(); // something like "Infantile onset" + return switch (label) { + case "Infantile onset" -> "bebé"; + case "Childhood onset" -> "niño"; + case "Neonatal onset" -> "neonate"; + case "Congenital onset" -> "recién nacido"; + case "Adult onset" -> "adulto"; + default-> String.format("During the %s", label.replace(" onset", "")); + }; + } else { + return ""; // should never get here + } + } +*/ + + private String atIsoAgeExact(PhenopacketAge ppktAge) { + Iso8601Age iso8601Age = (Iso8601Age) ppktAge; + int y = iso8601Age.getYears(); + int m = iso8601Age.getMonths(); + int d = iso8601Age.getDays(); + + if (y > 10) { + return String.format("%d jaar oud", y); + } else if (y > 0) { + if (m > 1) { + return String.format("%d jaar en %d maanden oud", y, m); + } else if (m == 1) { + return String.format("%d jaar en één maand oud", y); + } else { + return String.format("%d jaar oud", y); + } + } else if (m>0) { + return String.format("%d maanden en %d dagen oud", m, d); + } else { + return String.format("%d dagen oud", d); + } + } + + + @Override + public String getIndividualDescription(PpktIndividual individual) { + Optional lastExamOpt = individual.getAgeAtLastExamination(); + Optional onsetOpt = individual.getAgeAtOnset(); + PhenopacketSex psex = individual.getSex(); + if (lastExamOpt.isPresent() && onsetOpt.isPresent()) { + return onsetAndLastEncounterAvailable(psex, lastExamOpt.get(), onsetOpt.get()); + } else if (lastExamOpt.isPresent()) { + return lastEncounterAvailable(psex, lastExamOpt.get()); + } else if (onsetOpt.isPresent()) { + return onsetAvailable(psex, onsetOpt.get()); + } else { + return ageNotAvailable(psex); + } + } + + + private String iso8601ToYearMonth(Iso8601Age iso8601Age) { + if (iso8601Age.getMonths() == 0) { + return String.format("van %d jaar oud", iso8601Age.getYears()); + } else { + return String.format("van %d jaar en %d maanden", iso8601Age.getYears(), iso8601Age.getMonths()); + } + } + + private String iso8601ToMonthDay(Iso8601Age iso8601Age) { + int m = iso8601Age.getMonths(); + int d = iso8601Age.getDays(); + if (m == 0) { + return String.format("van %d dagen oud", d); + } else if (d>0){ + return String.format("van %d maanden en %d dagen oud", m, d); + } else { + return String.format("van %d maanden oud", m); + } + } + + /** + * Create a phrase such as "at the age of 7 years, 4 months, and 2 days" + * Leave out the months and days if they are zero. + * @param isoAge + * @return + */ + private String iso8601AtAgeOf(Iso8601Age isoAge) { + List components = new ArrayList<>(); + + if (isoAge.getYears()>1) { + components.add(String.format("%d jaar", isoAge.getYears())); + } else if (isoAge.getYears() == 1) { + components.add("één jaar oud"); + } + if (isoAge.getMonths() > 1) { + components.add(String.format("%d maanden", isoAge.getMonths())); + } else if (isoAge.getMonths() == 1) { + components.add("één maand oud"); + } + if (isoAge.getDays()>1) { + components.add(String.format("%d dagen", isoAge.getDays())); + } else if (isoAge.getDays()==1) { + components.add("één dag"); + } + if (components.isEmpty()) { + return "als pasgeborene"; + } else if (components.size() == 1) { + return "op de leeftijd van " + components.get(0); + } else if (components.size() == 2) { + return "op de leeftijd van " + components.get(0) + " en " + components.get(1); + } else { + return "op de leeftijd van " + components.get(0) + ". " + components.get(1) + + ", en " + components.get(2); + } + } + + private String onsetTermAtAgeOf(HpoOnsetAge hpoOnsetTermAge) { + if (hpoOnsetTermAge.isFetus()) { + return "in de foetale periode"; + } else if (hpoOnsetTermAge.isCongenital()) { + return "in de neonatale periode"; + } else if (hpoOnsetTermAge.isInfant()) { + return "als baby"; + } else if (hpoOnsetTermAge.isChild()) { + return "als kind"; + } else if (hpoOnsetTermAge.isJuvenile()) { + return "als adolescent"; + } else { + return "als volwassene"; + } + } + + + private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8601Age) { + int y = iso8601Age.getYears(); + int m = iso8601Age.getMonths(); + int d = iso8601Age.getDays(); + // if older + if (y>17) { + return switch (psex) { + case FEMALE -> String.format("vrouw van %d jaar oud", y); + case MALE -> String.format("man van %d jaar oud", y); + default -> String.format("persoon van %d jaar oud", y); + }; + } else if (y>9) { + return switch (psex) { + case FEMALE -> String.format("vrouwelijke adolescent van %d jaar oud", y); + case MALE -> String.format("mannelijke adolescent van %d jaar oud", y); + default -> String.format("adolescent van %d jaar oud", y); + }; + } else if (y>0) { + return switch (psex) { + case FEMALE -> String.format("meisje %s", iso8601ToYearMonth(iso8601Age)); + case MALE -> String.format("jongetje %s", iso8601ToYearMonth(iso8601Age)); + default -> String.format("kind %s", iso8601ToYearMonth(iso8601Age)); + }; + } else if (m>0 || d> 0) { + return switch (psex) { + case FEMALE -> String.format("vrouwelijke baby %s", iso8601ToMonthDay(iso8601Age)); + case MALE -> String.format("mannelijke baby %s", iso8601ToMonthDay(iso8601Age)); + default -> String.format("baby %s", iso8601ToMonthDay(iso8601Age)); + }; + } else { + return switch (psex) { + case FEMALE -> "pasgeboren meisje"; + case MALE -> "pasgeboren jongetje"; + default -> "pasgeborene"; + }; + } + } + + private String hpoOnsetIndividualDescription(PhenopacketSex psex, HpoOnsetAge hpoOnsetTermAge) { + if (hpoOnsetTermAge.isFetus()) { + return switch (psex) { + case FEMALE -> "vrouwelijke foetus"; + case MALE -> "mannelijke foetus"; + default -> "foetus"; + }; + } else if (hpoOnsetTermAge.isCongenital()) { + return switch (psex) { + case FEMALE -> "pasgeboren meisje"; + case MALE -> "pasgeboren jongetje"; + default -> "pasgeborene"; + }; + } else if (hpoOnsetTermAge.isInfant()) { + return switch (psex) { + case FEMALE -> "vrouwelijke baby"; + case MALE -> "mannelijke baby"; + default -> "baby"; + }; + } else if (hpoOnsetTermAge.isChild()) { + return switch (psex) { + case FEMALE -> "meisje"; + case MALE -> "jongetje"; + default -> "kind"; + }; + } else if (hpoOnsetTermAge.isJuvenile()) { + return switch (psex) { + case FEMALE -> "vrouwelijke adolescent"; + case MALE -> "mannelijke adolescent"; + default -> "adolescent"; + }; + }else { + return switch (psex) { + case FEMALE -> "vrouw"; + case MALE -> "man"; + default -> "volwassene"; + }; + } + } + + /** + * A sentence such as The proband was a 39-year old woman who presented at the age of 12 years with + * HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. This method returns the phrase that ends with "with" + * El sujeto era un niño de 1 año y 10 meses que se presentó como recién nacido con un filtrum largo. + * @param psex + * @param lastExamAge + * @param onsetAge + * @return + */ + private String onsetAndLastEncounterAvailable(PhenopacketSex psex, PhenopacketAge lastExamAge, PhenopacketAge onsetAge) { + String individualDescription; + String onsetDescription; + if (lastExamAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) lastExamAge; + individualDescription = iso8601individualDescription(psex, isoAge); + } else if (lastExamAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) lastExamAge; + individualDescription = hpoOnsetIndividualDescription(psex,hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize last exam age type " + lastExamAge.ageType()); + } + if (onsetAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) onsetAge; + onsetDescription = iso8601AtAgeOf(isoAge); + } else if (onsetAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) onsetAge; + onsetDescription = onsetTermAtAgeOf(hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); + } + return String.format("De proband was een %s die %s presenteerde met", individualDescription, onsetDescription); + } + + + /** + * Age at last examination available but age of onset not available + * The proband was a 39-year old woman who presented with HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. + * @param psex + * @param lastExamAge + */ + private String lastEncounterAvailable(PhenopacketSex psex, PhenopacketAge lastExamAge) { + String individualDescription; + if (lastExamAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) lastExamAge; + individualDescription = iso8601individualDescription(psex, isoAge); + } else if (lastExamAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) lastExamAge; + individualDescription = hpoOnsetIndividualDescription(psex,hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize last exam age type " + lastExamAge.ageType()); + } + return String.format("De proband was een %s die presenteerde met", individualDescription); + } + + /** + * Age at last examination not available but age of onset available + * The proband presented at the age of 12 years with HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. + * @param psex + * @param onsetAge + * @return + */ + private String onsetAvailable(PhenopacketSex psex, PhenopacketAge onsetAge) { + String onsetDescription; + if (onsetAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) onsetAge; + onsetDescription = iso8601AtAgeOf(isoAge); + } else if (onsetAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) onsetAge; + onsetDescription = onsetTermAtAgeOf(hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); + } + return String.format("De proband presenteerde %s met", onsetDescription, onsetDescription); + } + + private String ageNotAvailable(PhenopacketSex psex) { + return switch (psex) { + case FEMALE -> "De proband was een vrouw die presenteerde met"; + case MALE -> "De proband was een man die presenteerde met"; + default -> "De proband presenteerde met"; + }; + } + + @Override + public String heSheIndividual(PhenopacketSex psex) { + return switch (psex) { + case FEMALE -> "zij"; + case MALE -> "hij"; + default -> "de persoon"; + }; + } + + @Override + public String atAge(PhenopacketAge ppktAge) { + if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + return "Op de leeftijd van " + atIsoAgeExact(ppktAge); + } else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + String label = ppktAge.age(); // something like "Infantile onset" + return switch (label) { + case "Infantile onset" -> "Tijdens de infantiele periode"; + case "Childhood onset" -> "Tijdens de jeugd"; + case "Neonatal onset" -> "Tijdens de neonatale periode"; + case "Congenital onset" -> "Bij geboorte"; + case "Adult onset" -> "Op volwassen leeftijd"; + default-> String.format("Tijdens de %s periode", label.replace(" onset", "")); + }; + } else { + return ""; // should never get here + } + } + + // @Override + public String ppktSex(PpktIndividual individual) { + PhenopacketSex psex = individual.getSex(); + Optional ageOpt = individual.getAgeAtLastExamination(); + if (ageOpt.isEmpty()) { + ageOpt = individual.getAgeAtOnset(); + } + if (ageOpt.isEmpty()) { + return switch (psex) { + case FEMALE -> "vrouw"; + case MALE -> "man"; + default -> "individu"; + }; + } + PhenopacketAge age = ageOpt.get();; + if (age.isChild()) { + return switch (psex) { + case FEMALE -> "meisje"; + case MALE -> "jongetje"; + default -> "kind"; + }; + } else if (age.isCongenital()) { + return switch (psex) { + case FEMALE -> "vrouwelijke pasgeborene"; + case MALE -> "mannelijke pasgeborene"; + default -> "pasgeborene"; + }; + } else if (age.isFetus()) { + return switch (psex) { + case FEMALE -> "vrouwelijke foetus"; + case MALE -> "mannelijke foetus"; + default -> "foetus"; + }; + } else if (age.isInfant()) { + return switch (psex) { + case FEMALE -> "vrouwelijke baby"; + case MALE -> "mannelijke baby"; + default -> "baby"; + }; + } else { + return switch (psex) { + case FEMALE -> "vrouw"; + case MALE -> "man"; + default -> "individu"; + }; + } + } + + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktPhenotypicfeatureDutch.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktPhenotypicfeatureDutch.java new file mode 100644 index 0000000..5e635fd --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktPhenotypicfeatureDutch.java @@ -0,0 +1,93 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.dutch; + +import org.monarchinitiative.phenopacket2prompt.international.HpInternational; +import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; +import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; + +import java.util.*; +import java.util.function.Predicate; + +public class PpktPhenotypicfeatureDutch implements PpktPhenotypicFeatureGenerator { + + private final HpInternational dutch; + private Set missingTranslations; + + public PpktPhenotypicfeatureDutch(HpInternational international) { + dutch = international; + missingTranslations = new HashSet<>(); + } + + + + private List getTranslations(List ontologyTerms) { + List labels = new ArrayList<>(); + for (var term: ontologyTerms) { + Optional opt = dutch.getLabel(term.getTid()); + if (opt.isPresent()) { + labels.add(opt.get()); + } else { + String missing = String.format(" %s (%s)", term.getLabel(), term.getTid().getValue()); + missingTranslations.add(missing); + } + } + return labels; + } + + + private final Set vowels = Set.of('A', 'E', 'I', 'O', 'U', 'Y'); + + private String getOxfordCommaList(List items) { + if (items.size() == 1) { + return items.get(0); + } + if (items.size() == 2) { + // no comma if we just have two items. + // one item will work with the below code + return String.join(" en ", items); + } + String symList = String.join(", ", items); + int jj = symList.lastIndexOf(", "); + if (jj > 0) { + String end = symList.substring(jj+2); + if (vowels.contains(end.charAt(0))) { + symList = symList.substring(0, jj) + " en " + end; + } else { + symList = symList.substring(0, jj) + " en " + end; + } + } + return symList; + } + + @Override + public String formatFeatures(List ontologyTerms) { + List observedTerms = ontologyTerms.stream() + .filter(Predicate.not(OntologyTerm::isExcluded)) + .toList(); + List observedLabels = getTranslations(observedTerms); + List excludedTerms = ontologyTerms.stream() + .filter(OntologyTerm::isExcluded).toList(); + List excludedLabels = getTranslations(excludedTerms); + if (observedLabels.isEmpty() && excludedLabels.isEmpty()) { + return "no phenotypic abnormalities"; // should never happen, actually! + } else if (excludedLabels.isEmpty()) { + return getOxfordCommaList(observedLabels) + ". "; + } else if (observedLabels.isEmpty()) { + if (excludedLabels.size() > 1) { + return String.format("dus %s zijn uitgesloten.", getOxfordCommaList(excludedLabels)); + } else { + return String.format("Dus %s werd uitgesloten.",excludedLabels.getFirst()); + } + } else { + String exclusion; + if (excludedLabels.size() == 1) { + exclusion = String.format(". %s werd uitgesloten.", getOxfordCommaList(excludedLabels)); + } else { + exclusion = String.format(". %s zijn uitgesloten.", getOxfordCommaList(excludedLabels)); + } + return getOxfordCommaList(observedLabels) + exclusion; + } + } + public Set getMissingTranslations() { + return missingTranslations; + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktTextDutch.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktTextDutch.java new file mode 100644 index 0000000..82c61e6 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/PpktTextDutch.java @@ -0,0 +1,23 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.dutch; + +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketTextGenerator; + +public class PpktTextDutch implements PhenopacketTextGenerator { + + @Override + public String QUERY_HEADER() { + return """ +Ik voer een experiment uit op basis van een klinisch casusrapport om te zien hoe jouw diagnoses zich verhouden tot die van menselijke experts. Ik ga je een deel van een medische casus geven. Je probeert geen patiënten te behandelen. In dit geval ben je “Dr. GPT-4”, een AI-taalmodel dat een diagnose stelt. Hier zijn enkele richtlijnen. Ten eerste bestaat er één definitieve diagnose, en het is een diagnose waarvan tegenwoordig bekend is dat deze ook bij mensen voorkomt. De diagnose wordt bijna altijd bevestigd door een soort genetische test, hoewel in zeldzame gevallen, wanneer een dergelijke test niet bestaat voor een diagnose, de diagnose in plaats daarvan kan worden gesteld op basis van gevalideerde klinische criteria of zeer zelden alleen maar kan worden bevestigd door de mening van deskundigen. Nadat je de casus hebt gelezen, wil ik dat je een differentiële diagnose geeft met een lijst met kandidaat-diagnoses, gerangschikt op waarschijnlijkheid, te beginnen met de meest waarschijnlijke kandidaat. Elke kandidaat moet worden gespecificeerd met de ziektenaam. Als de eerste kandidaat bijvoorbeeld het Branchio-oculofaciaal syndroom is en de tweede cystische fibrose, geef het dan zo in het Engels weer: + +1. Branchiooculofacial syndrome +2. Cystic fibrosis + +Deze lijst moet zoveel diagnoses bevatten als je redelijk acht. +Je hoeft je redenering niet uit te leggen, vermeld alleen de diagnoses. +Ik heb je deze instructies in het Nederlands gegeven, maar ik zou graag willen dat je je antwoord alleen in het Engels geeft. + +Hier is het geval: +"""; + } + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGenerator.java index 905bd5e..765be68 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGenerator.java @@ -1,6 +1,5 @@ package org.monarchinitiative.phenopacket2prompt.output.impl.english; -import org.monarchinitiative.phenol.ontology.data.Ontology; import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; @@ -11,20 +10,18 @@ public class EnglishPromptGenerator implements PromptGenerator { - private final Ontology hpo; - private final PhenopacketIndividualInformationGenerator ppktAgeGenerator; + private final PPKtIndividualInfoGenerator ppktAgeGenerator; private final PhenopacketTextGenerator ppktTextGenerator; private final PpktPhenotypicFeatureGenerator ppktPhenotypicFeatureGenerator; - public EnglishPromptGenerator(Ontology hpo){ - this.hpo = hpo; + public EnglishPromptGenerator(){ ppktAgeGenerator = new PpktIndividualEnglish(); ppktTextGenerator = new PpktTextEnglish(); - this.ppktPhenotypicFeatureGenerator = new PpktPhenotypicfeatureEnglish(); + this.ppktPhenotypicFeatureGenerator = new PpktPhenotypicFeatureEnglish(); } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglish.java index 75bac66..90ec3cf 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglish.java @@ -2,13 +2,13 @@ import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenopacket2prompt.model.*; -import org.monarchinitiative.phenopacket2prompt.output.PhenopacketIndividualInformationGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; import java.util.ArrayList; import java.util.List; import java.util.Optional; -public class PpktIndividualEnglish implements PhenopacketIndividualInformationGenerator { +public class PpktIndividualEnglish implements PPKtIndividualInfoGenerator { public PpktIndividualEnglish() { @@ -16,6 +16,9 @@ public PpktIndividualEnglish() { public String getIndividualDescription(PpktIndividual individual) { + if (individual.annotationCount() == 0) { + throw new PhenolRuntimeException("No HPO annotations"); + } Optional lastExamOpt = individual.getAgeAtLastExamination(); Optional onsetOpt = individual.getAgeAtOnset(); PhenopacketSex psex = individual.getSex(); @@ -60,7 +63,7 @@ private String iso8601ToMonthDay(Iso8601Age iso8601Age) { } else if (d>0){ return String.format("%d-month, %d-day old", m, d); } else { - return String.format("%d-month old", m, d); + return String.format("%d-month old", m); } } @@ -73,20 +76,17 @@ private String iso8601ToMonthDay(Iso8601Age iso8601Age) { private String iso8601AtAgeOf(Iso8601Age isoAge) { List components = new ArrayList<>(); - if (isoAge.getYears()>1) { - components.add(String.format("%d years", isoAge.getYears())); - } else if (isoAge.getYears() == 1) { - components.add("1 year"); + if (isoAge.getYears()>0) { + String ystring = isoAge.getYears() == 1 ? "year" : "years"; + components.add(String.format("%d %s", isoAge.getYears(), ystring)); } - if (isoAge.getMonths() > 1) { - components.add(String.format("%d months", isoAge.getMonths())); - } else if (isoAge.getMonths() == 1) { - components.add("1 month"); + if (isoAge.getMonths() > 0) { + String mstring = isoAge.getMonths() == 1 ? "month" : "months"; + components.add(String.format("%d %s", isoAge.getMonths(), mstring)); } - if (isoAge.getDays()>1) { - components.add(String.format("%d days", isoAge.getDays())); - } else if (isoAge.getDays()==1) { - components.add("1 day"); + if (isoAge.getDays()>0) { + String dstring = isoAge.getDays() == 1 ? "day" : "days"; + components.add(String.format("%d %s", isoAge.getDays(), dstring)); } if (components.isEmpty()) { return "as a newborn"; @@ -104,7 +104,7 @@ private String onsetTermAtAgeOf(HpoOnsetAge hpoOnsetTermAge) { if (hpoOnsetTermAge.isFetus()) { return "in the fetal period"; } else if (hpoOnsetTermAge.isCongenital()) { - return "as a newborn"; + return "at birth"; } else if (hpoOnsetTermAge.isInfant()) { return "as an infant"; } else if (hpoOnsetTermAge.isChild()) { @@ -142,9 +142,9 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 }; } else if (m>0 || d> 0) { return switch (psex) { - case FEMALE -> String.format("%s baby girl", iso8601ToMonthDay(iso8601Age)); - case MALE -> String.format("\"%s baby boy", iso8601ToMonthDay(iso8601Age)); - default -> String.format("%s baby", iso8601ToMonthDay(iso8601Age)); + case FEMALE -> String.format("%s female infant", iso8601ToMonthDay(iso8601Age)); + case MALE -> String.format("%s male infant", iso8601ToMonthDay(iso8601Age)); + default -> String.format("%s infant", iso8601ToMonthDay(iso8601Age)); }; } else { return switch (psex) { diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicfeatureEnglish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglish.java similarity index 60% rename from src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicfeatureEnglish.java rename to src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglish.java index 7bc0503..683afbd 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicfeatureEnglish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglish.java @@ -4,14 +4,13 @@ import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; import java.util.List; -import java.util.function.Predicate; -public class PpktPhenotypicfeatureEnglish implements PpktPhenotypicFeatureGenerator { +public class PpktPhenotypicFeatureEnglish implements PpktPhenotypicFeatureGenerator { private String getOxfordCommaList(List items) { if (items.size() == 1) { - return items.get(0); + return items.getFirst(); } if (items.size() == 2) { // no comma if we just have two items. @@ -29,24 +28,23 @@ private String getOxfordCommaList(List items) { /** * format features * The proband was a 39-year old woman who presented at the age of 12 years with HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. + * The patient presented with [list of symptoms]. However, [excluded symptoms] were not observed." */ @Override public String formatFeatures(List ontologyTerms) { - List observed = ontologyTerms.stream() - .filter(Predicate.not(OntologyTerm::isExcluded)) - .map(OntologyTerm::getLabel).toList(); - List excluded = ontologyTerms.stream() - .filter(OntologyTerm::isExcluded) - .map(OntologyTerm::getLabel).toList(); + List observed = getObservedFeaturesAsStr(ontologyTerms); + List excluded = getExcludedFeaturesAsStr(ontologyTerms); if (observed.isEmpty() && excluded.isEmpty()) { - return "no phenotypic abnormalities"; // should never happen, actually! + return "no phenotypic abnormalities."; // should never happen, actually! } else if (excluded.isEmpty()) { return getOxfordCommaList(observed) + ". "; } else if (observed.isEmpty()) { - return "exclusion of " + getOxfordCommaList(excluded) + "."; + return "the following manifestations that were excluded: " + getOxfordCommaList(excluded) + ". "; } else { - String exclusion = String.format("%s %s excluded.", getOxfordCommaList(excluded), excluded.size() > 1 ? " were" : "was"); - return getOxfordCommaList(observed) + ", whereby " + exclusion; + String exclusion = String.format("However, %s %s excluded.", getOxfordCommaList(excluded), excluded.size() > 1 ? " were" : "was"); + return getOxfordCommaList(observed) + ". " + exclusion; } } + + } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktTextEnglish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktTextEnglish.java index 5bf3c37..647424c 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktTextEnglish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktTextEnglish.java @@ -6,14 +6,14 @@ public class PpktTextEnglish implements PhenopacketTextGenerator { @Override public String QUERY_HEADER() { return """ -I am running an experiment on a clinical case report to see how your diagnoses compare with those of human experts. I am going to give you part of a medical case. You are not trying to treat any patients. In this case, you are “Dr. GPT-4,” an AI language model who is providing a diagnosis Here are some guidelines. First, there is a single definitive diagnosis, and it is a diagnosis that is known today to exist in humans. The diagnosis is almost always confirmed by some sort of genetic test, though in rare cases when such a test does not exist for a diagnosis the diagnosis can instead be made using validated clinical criteria or very rarely just confirmed by expert opinion. After you read the case, I want you to give a differential diagnosis with a list of candidate diagnoses ranked by probability starting with the most likely candidate. Each candidate should be specified with the OMIM identifier and disease name. For instance, if the first candidate is Branchiooculofacial syndrome and the second is Cystic fibrosis, provide this: +I am running an experiment on a clinical case report to see how your diagnoses compare with those of human experts. I am going to give you part of a medical case. You are not trying to treat any patients. In this case, you are “Dr. GPT-4”, an AI language model who is providing a diagnosis. Here are some guidelines. First, there is a single definitive diagnosis, and it is a diagnosis that is known today to exist in humans. The diagnosis is almost always confirmed by some sort of genetic test, though in rare cases when such a test does not exist for a diagnosis the diagnosis can instead be made using validated clinical criteria or very rarely just confirmed by expert opinion. After you read the case, I want you to give a differential diagnosis with a list of candidate diagnoses ranked by probability starting with the most likely candidate. Each candidate should be specified with disease name. For instance, if the first candidate is Branchiooculofacial syndrome and the second is Cystic fibrosis, provide this: -1. OMIM:113620 - Branchiooculofacial syndrome -2. OMIM:219700 - Cystic fibrosis +1. Branchiooculofacial syndrome +2. Cystic fibrosis This list should provide as many diagnoses as you think are reasonable. -You do not need to explain your reasoning, just list the diagnoses together with the OMIM identifiers. +You do not need to explain your reasoning, just list the diagnoses. Here is the case: """; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java new file mode 100644 index 0000000..25c2ace --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGenerator.java @@ -0,0 +1,66 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.german; + +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketTextGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator; + +import java.util.List; +import java.util.Set; + +public class GermanPromptGenerator implements PromptGenerator { + + private final PPKtIndividualInfoGenerator ppktAgeSexGenerator; + + private final PhenopacketTextGenerator ppktTextGenerator; + + private final PpktPhenotypicFeatureGenerator ppktPhenotypicFeatureGenerator; + + + + public GermanPromptGenerator(PpktPhenotypicFeatureGenerator pfgen) { + ppktAgeSexGenerator = new PpktIndividualGerman(); + ppktTextGenerator = new PpktTextGerman(); + this.ppktPhenotypicFeatureGenerator = pfgen; + } + + + + + @Override + public String queryHeader() { + return ppktTextGenerator.QUERY_HEADER(); + } + + @Override + public String getIndividualInformation(PpktIndividual ppktIndividual) { + return this.ppktAgeSexGenerator.getIndividualDescription(ppktIndividual); + } + + @Override + public String formatFeatures(List ontologyTerms) { + return ppktPhenotypicFeatureGenerator.formatFeatures(ontologyTerms); + } + + @Override + public String getVignetteAtAge(PhenopacketAge page, PhenopacketSex psex, List terms) { + String ageString = this.ppktAgeSexGenerator.atAge(page); + String features = formatFeatures(terms); + return String.format("%s, präsentierte %s mit den folgenden Symptomen: %s", ageString, ppktAgeSexGenerator.heSheIndividual(psex), features); + } + + + @Override + public Set getMissingTranslations() { + return this.ppktPhenotypicFeatureGenerator.getMissingTranslations(); + } + + + + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java new file mode 100644 index 0000000..8b134ce --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java @@ -0,0 +1,510 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.german; + +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenopacket2prompt.model.*; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +public class PpktIndividualGerman implements PPKtIndividualInfoGenerator { + + + private static final String FEMALE_INFANT = "ein weiblicher Säugling"; + private static final String MALE_INFANT = "ein männlicher Säugling"; + private static final String INFANT = "ein Säugling"; + + private static final String FEMALE_FETUS = "ein weiblicher Fet"; + private static final String MALE_FETUS = "ein männlicher Fet"; + private static final String FETUS = "ein Fet"; + + private static final String FEMALE_CHILD = "Mädchen"; + private static final String MALE_CHILD = "Junge"; + private static final String CHILD = "Kind"; + + private static final String FEMALE_ADULT = "Frau"; + private static final String MALE_ADULT = "Mann"; + private static final String ADULT = "erwachsene Person unbekannten Geschlechtes"; + /** + * Equivalent of "The clinical + * @param individual + * @return + */ + public String ageAndSexAtOnset(PpktIndividual individual) { + Optional ageOpt = individual.getAgeAtOnset(); + return ""; + } + + + + + public String ageAndSexAtLastExamination(PpktIndividual individual) { + PhenopacketSex psex = individual.getSex(); + Optional ageOpt = individual.getAgeAtLastExamination(); + if (ageOpt.isEmpty()) { + ageOpt = individual.getAgeAtOnset(); + } + String sex; + switch (psex) { + case FEMALE -> sex = FEMALE_ADULT; + case MALE -> sex = MALE_ADULT; + default -> sex = ADULT; + } + + if (ageOpt.isEmpty()) { + return sex; + } + PhenopacketAge age = ageOpt.get(); + if (age.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoage = (Iso8601Age) age; + int y = isoage.getYears(); + int m = isoage.getMonths(); + int d = isoage.getDays(); + if (psex.equals(PhenopacketSex.FEMALE)) { + if (y > 17) { + return String.format("Eine %djährige Patientin", y); + } else if (y > 9) { + return String.format("Eine %djährige Jugendliche", y); + } else if (y > 0) { + return String.format("Ein %djähriges Mädchen", y); + } else if (m>0) { + return String.format("Ein %d Monate alter weiblicher Säugling", m); + } else { + return String.format("Ein %d Tage alter weiblicher Säugling", d); + } + } + } else { + // age is an HPO onset term, we do not have an exact date + } + if (age.isChild()) { + return switch (psex) { + case FEMALE -> FEMALE_CHILD; + case MALE -> MALE_CHILD; + default -> CHILD; + }; + } else if (age.isCongenital()) { + return switch (psex) { + case FEMALE -> "ein weibliches Neugeborenes"; + case MALE -> "ein männliches Neugeborenes"; + default -> "ein Neugeborenes"; + }; + } else if (age.isFetus()) { + return switch (psex) { + case FEMALE -> "ein weiblicher Fet"; + case MALE -> "ein männlicher Fet"; + default -> "ein Fet"; + }; + } else if (age.isInfant()) { + return switch (psex) { + case FEMALE -> FEMALE_INFANT; + case MALE -> MALE_INFANT; + default -> INFANT; + }; + } else { + return switch (psex) { + case FEMALE -> FEMALE_ADULT; + case MALE -> MALE_ADULT; + default -> ADULT; + }; + } + } + + + private String imAlterVonIsoAgeExact(PhenopacketAge ppktAge) { + Iso8601Age iso8601Age = (Iso8601Age) ppktAge; + int y = iso8601Age.getYears(); + int m = iso8601Age.getMonths(); + int d = iso8601Age.getDays(); + + if (y > 10) { + return String.format("Im Alter von %d Jahren", y); + } else if (y > 0) { + if (m > 0) { + return String.format("Im Alter von %d %s und %d %s", y, + y>1?"Jahren" : "Jahr", + m, m>1?"Monaten" : "Monat"); + } else { + return String.format("Im Alter von %d %s", y, y>1?"Jahren" : "Jahr"); + } + } + if (m>0) { + return String.format("Im Alter von %d %s y %d %s", m, m>1?"Monaten" : "Monat", + d, d>1?"Tagen" : "Tag"); + } else { + return String.format("%d Tage", d); + } + } + + + @Override + public String getIndividualDescription(PpktIndividual individual) { + if (individual.annotationCount() == 0) { + throw new PhenolRuntimeException("No HPO annotations"); + } + Optional lastExamOpt = individual.getAgeAtLastExamination(); + Optional onsetOpt = individual.getAgeAtOnset(); + PhenopacketSex psex = individual.getSex(); + if (lastExamOpt.isPresent() && onsetOpt.isPresent()) { + return onsetAndLastEncounterAvailable(psex, lastExamOpt.get(), onsetOpt.get()); + } else if (lastExamOpt.isPresent()) { + return latestEncounterAvailable(psex, lastExamOpt.get()); + } else if (onsetOpt.isPresent()) { + return onsetAvailable(psex, onsetOpt.get()); + } else { + return ageNotAvailable(psex); + } + } + + + private String iso8601ToYearMonth(Iso8601Age iso8601Age, PhenopacketSex psex) { + int y = iso8601Age.getYears(); + int m = iso8601Age.getMonths(); + int d = iso8601Age.getDays(); + if (psex.equals(PhenopacketSex.MALE)) { + if (iso8601Age.getMonths() == 0) { + return String.format("ein %djähriger Junge", y); + } else { + return String.format("ein %d %s, %d %s alter Junge", y, y>1?"Jahre":"Jahr", m, m>1?"Monate":"Monat"); + } + } else if (psex.equals(PhenopacketSex.FEMALE)) { + if (iso8601Age.getMonths() == 0) { + return String.format("ein %djähriges Mädchen", y); + } else { + return String.format("ein %d %s, %d %s altes Mädchen", y, y>1?"Jahre":"Jahr", m, m>1?"Monate":"Monat"); + } + } + if (iso8601Age.getMonths() == 0) { + return String.format("ein %djähriges Kind", y); + } else { + return String.format("ein %d %s, %d %s altes Kind", y, y>1?"Jahre":"Jahr", m, m>1?"Monate":"Monat"); } + } + + private String monthString(int m) { + return m>1 ? "Monate": "Monat"; + } + + private String dayString(int d) { + return d>1 ? "Tage": "Tag"; + } + + private String iso8601ToMonthDay(Iso8601Age iso8601Age) { + int m = iso8601Age.getMonths(); + int d = iso8601Age.getDays(); + if (m == 0) { + return String.format("de %d dias", d); + } else if (d>0){ + return String.format("%d %s und %d %s", m, monthString(m), d, dayString(d)); + } else { + return String.format("%d %s", m, m>1 ? "Monate": "Monat"); + } + } + + /** + * Create a phrase such as "at the age of 7 years, 4 months, and 2 days" + * Leave out the months and days if they are zero. + * @param isoAge + * @return + */ + private String iso8601AtAgeOf(Iso8601Age isoAge) { + List components = new ArrayList<>(); + + if (isoAge.getYears()>1) { + components.add(String.format("%d Jahren", isoAge.getYears())); + } else if (isoAge.getYears() == 1) { + components.add("einem Jahr"); + } + if (isoAge.getMonths() > 1) { + components.add(String.format("%d Monaten", isoAge.getMonths())); + } else if (isoAge.getMonths() == 1) { + components.add("einem Monat"); + } + if (isoAge.getDays()>1) { + components.add(String.format("%d Tagen", isoAge.getDays())); + } else if (isoAge.getDays()==1) { + components.add("einem Tag"); + } + if (components.isEmpty()) { + return "bei der Geburt"; + } else if (components.size() == 1) { + return "im Alter von " + components.getFirst(); + } else if (components.size() == 2) { + return "im Alter von " + components.get(0) + " und " + components.get(1); + } else { + return "im Alter von " + components.get(0) + ", " + components.get(1) + + " und " + components.get(2); + } + } + + private String onsetTermAtAgeOf(HpoOnsetAge hpoOnsetTermAge) { + if (hpoOnsetTermAge.isFetus()) { + return "in der Fetalperiode"; + } else if (hpoOnsetTermAge.isCongenital()) { + return "bei der Geburt"; + } else if (hpoOnsetTermAge.isInfant()) { + return "im Säuglingsalter"; + } else if (hpoOnsetTermAge.isChild()) { + return "in der Kindheit"; + } else if (hpoOnsetTermAge.isJuvenile()) { + return "als Jugendlich adolescente"; + } else { + return "im Erwachsenenalter"; + } + } + + + private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8601Age) { + int y = iso8601Age.getYears(); + int m = iso8601Age.getMonths(); + int d = iso8601Age.getDays(); + // if older + if (y>17) { + return switch (psex) { + case FEMALE -> String.format("Die Patientin war eine %d-jährige Frau", y); + case MALE -> String.format("Der Patient war ein %d-jähriger Mann", y); + default -> String.format("Der Patient war eine %d-jährige Person", y); + }; + } else if (y>9) { + return switch (psex) { + case FEMALE -> String.format("Die Patientin war eine %d-jährige Jugendliche", y); + case MALE -> String.format("Der Patient war ein %d-jähriger Jugendlicher", y); + default -> String.format("Der Patient war ein %d-jähriger Jugendlicher", y); + }; + } else if (y>0) { + return switch (psex) { + case FEMALE -> String.format("Die Patientin war %s", iso8601ToYearMonth(iso8601Age, psex)); + case MALE -> + String.format("Der Patient war %s", iso8601ToYearMonth(iso8601Age, psex)); + default -> String.format("Der Patient war %s", iso8601ToYearMonth(iso8601Age, psex)); + }; + } else if (m>0 || d> 0) { + return switch (psex) { + case FEMALE -> String.format("Die Patientin war ein %s alter weiblicher Säugling", iso8601ToMonthDay(iso8601Age)); + case MALE -> String.format("Der Patient war ein %s alter Säugling", iso8601ToMonthDay(iso8601Age)); + default -> String.format("Der Patient war ein %s alter Säugling", iso8601ToMonthDay(iso8601Age)); + }; + } else { + return switch (psex) { + case FEMALE -> "Die Patientin war ein weibliches Neugeborenes"; + case MALE -> "Der Patient war ein männliches Neugeborenes"; + default -> "Der Patient war ein Neugeborenes"; + }; + } + } + + private String hpoOnsetIndividualDescription(PhenopacketSex psex, HpoOnsetAge hpoOnsetTermAge) { + if (hpoOnsetTermAge.isFetus()) { + return switch (psex) { + case FEMALE -> FEMALE_FETUS; + case MALE -> MALE_FETUS; + default -> FETUS; + }; + } else if (hpoOnsetTermAge.isCongenital()) { + return switch (psex) { + case FEMALE -> "Die Patientin war ein weibliches Neugeborenes, das sich"; + case MALE -> "Der Patient war ein männliches Neugeborenes, das sich"; + default -> "Der Patient war ein Neugeborenes, das sich"; + }; + } else if (hpoOnsetTermAge.isInfant()) { + return switch (psex) { + case FEMALE -> "Die Patientin war ein weiblicher Säugling, der sich "; + case MALE -> "Der Patient war ein männlicher Säugling, der sich"; + default -> "Der Patient war ein Säugling, der sich"; + }; + } else if (hpoOnsetTermAge.isChild()) { + return switch (psex) { + case FEMALE -> "Die Patientin war ein Mädchen, das sich "; + case MALE -> "Der Patient war ein Junge, der sich"; + default -> "Der Patient war ein Kind, das sich"; + }; + } else if (hpoOnsetTermAge.isJuvenile()) { + return switch (psex) { + case FEMALE -> "Die Patientin war eine Jugendliche, die sich"; + case MALE -> "Der Patient war ein Jugendlicher, der sich"; + default -> "Der Patient war ein Jugendlicher, der sich"; + }; + }else { + return switch (psex) { + case FEMALE -> "Die Patientin war eine Frau, die sich"; + case MALE -> "Der Patient war ein Mann, der sich"; + default -> "Der Patient war eine erwachsene Person nicht angegebenen Geschlechtes, die sich"; + }; + } + } + + /** + * A sentence such as The proband was a 39-year old woman who presented at the age of 12 years with + * HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. This method returns the phrase that ends with "with" + * El sujeto era un niño de 1 año y 10 meses que se presentó como recién nacido con un filtrum largo. + * @param psex + * @param lastExamAge + * @param onsetAge + * @return + */ + private String onsetAndLastEncounterAvailable(PhenopacketSex psex, PhenopacketAge lastExamAge, PhenopacketAge onsetAge) { + String individualDescription; + String onsetDescription; + if (lastExamAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) lastExamAge; + individualDescription = iso8601individualDescription(psex, isoAge); + } else if (lastExamAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) lastExamAge; + individualDescription = hpoOnsetIndividualDescription(psex,hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize last exam age type " + lastExamAge.ageType()); + } + if (onsetAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) onsetAge; + onsetDescription = iso8601AtAgeOf(isoAge); + } else if (onsetAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) onsetAge; + onsetDescription = onsetTermAtAgeOf(hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); + } + return switch (psex) { + case FEMALE -> String.format("%s, die sich %s mit den folgenden Symptomen vorgestellt hat: ", individualDescription, onsetDescription); + default -> String.format("%s, der sich %s mit den folgenden Symptomen vorgestellt hat: ", individualDescription, onsetDescription); + }; + } + + + /** + * Age at last examination available but age of onset not available + * The proband was a 39-year old woman who presented with HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. + * @param psex + * @param lastExamAge + */ + private String latestEncounterAvailable(PhenopacketSex psex, PhenopacketAge lastExamAge) { + String individualDescription; + if (lastExamAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) lastExamAge; + individualDescription = iso8601individualDescription(psex, isoAge); + } else if (lastExamAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) lastExamAge; + individualDescription = hpoOnsetIndividualDescription(psex,hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize last exam age type " + lastExamAge.ageType()); + } + if (psex.equals(PhenopacketSex.FEMALE)) { + return String.format("%s, die sich mit den folgenden Symptomen vorgestellt hat: ", individualDescription); + } else { + return String.format("%s, der sich mit den folgenden Symptomen vorgestellt hat: ", individualDescription); + } + + } + + /** + * Age at last examination not available but age of onset available + * The proband presented at the age of 12 years with HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. + * @param psex + * @param onsetAge + * @return + */ + private String onsetAvailable(PhenopacketSex psex, PhenopacketAge onsetAge) { + String onsetDescription; + if (onsetAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) onsetAge; + onsetDescription = iso8601AtAgeOf(isoAge); + } else if (onsetAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) onsetAge; + onsetDescription = onsetTermAtAgeOf(hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); + } + return String.format("Der Patient stellte sich %s mit den folgenden Symptomen vor: ", onsetDescription); + } + + private String ageNotAvailable(PhenopacketSex psex) { + return switch (psex) { + case FEMALE -> "Die Patientin stellte sich mit den folgenden Symptomen vor: "; + case MALE -> "Der Patient stellte sich mit den folgenden Symptomen vor: "; + default -> "Der Patient stellte sich mit den folgenden Symptomen vor: "; + }; + } + + @Override + public String heSheIndividual(PhenopacketSex psex) { + return switch (psex) { + case FEMALE -> "sie"; + case MALE -> "er"; + default -> "die Person"; + }; + } + + @Override + public String atAge(PhenopacketAge ppktAge) { + if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + return imAlterVonIsoAgeExact(ppktAge); + } else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + String label = ppktAge.age(); // something like "Infantile onset" + return switch (label) { + case "Infantile onset" -> "Als Säugling"; + case "Childhood onset" -> "In der Kindheit"; + case "Neonatal onset" -> "In der neugeborenen Zeit"; + case "Congenital onset" -> "Zum Zeitpunkt der Geburt"; + case "Adult onset" -> "Im Erwachsenenalter"; + case "Juvenile onset" -> "Im Jugendlichenalter"; + default-> { + throw new PhenolRuntimeException("No German translation for " + label); + } + }; + } else { + return ""; // should never get here + } + } + + // @Override + public String ppktSex(PpktIndividual individual) { + PhenopacketSex psex = individual.getSex(); + Optional ageOpt = individual.getAgeAtLastExamination(); + if (ageOpt.isEmpty()) { + ageOpt = individual.getAgeAtOnset(); + } + if (ageOpt.isEmpty()) { + return switch (psex) { + case FEMALE -> "Frau"; + case MALE -> "Mann"; + default -> "Person"; + }; + } + PhenopacketAge age = ageOpt.get(); + if (age.isChild()) { + return switch (psex) { + case FEMALE -> "Mädchen"; + case MALE -> "Junge"; + default -> "Kind"; + }; + } else if (age.isCongenital()) { + return switch (psex) { + case FEMALE -> "weibliches Neugeborenes"; + case MALE -> "männliches Neugeborenes"; + default -> "Neugeborenes"; + }; + } else if (age.isFetus()) { + return switch (psex) { + case FEMALE -> FEMALE_FETUS; + case MALE -> MALE_FETUS; + default -> FETUS; + }; + } else if (age.isInfant()) { + return switch (psex) { + case FEMALE -> FEMALE_INFANT; + case MALE -> MALE_INFANT; + default -> INFANT; + }; + } else { + return switch (psex) { + case FEMALE -> "Frau"; + case MALE -> "Mann"; + default -> "Person"; + }; + } + } + + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktPhenotypicfeatureGerman.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktPhenotypicfeatureGerman.java new file mode 100644 index 0000000..c257aa8 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktPhenotypicfeatureGerman.java @@ -0,0 +1,78 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.german; + +import org.monarchinitiative.phenopacket2prompt.international.HpInternational; +import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; +import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; + +import java.util.*; + +public class PpktPhenotypicfeatureGerman implements PpktPhenotypicFeatureGenerator { + + private final HpInternational german; + private Set missingTranslations; + + + public PpktPhenotypicfeatureGerman(HpInternational international) { + german = international; + missingTranslations = new HashSet<>(); + } + + + private List getTranslations(List ontologyTerms) { + List labels = new ArrayList<>(); + for (var term: ontologyTerms) { + Optional opt = german.getLabel(term.getTid()); + if (opt.isPresent()) { + labels.add(opt.get()); + } else { + String missing = String.format(" %s (%s)", term.getLabel(), term.getTid().getValue()); + missingTranslations.add(missing); + } + } + return labels; + } + + + + private String getCommaList(List items) { + if (items.size() == 1) { + return items.getFirst(); + } + if (items.size() == 2) { + // no comma if we just have two items. + // one item will work with the below code + return String.join(" und ", items); + } + String symList = String.join(", ", items); + int jj = symList.lastIndexOf(", "); + String end = symList.substring(jj+2); + symList = symList.substring(0, jj) + " und " + end; + return symList; + } + + @Override + public String formatFeatures(List ontologyTerms) { + List observedTerms = getObservedFeatures(ontologyTerms); + List excludedTerms = getExcludedFeatures(ontologyTerms); + List observedLabels = getTranslations(observedTerms); + List excludedLabels = getTranslations(excludedTerms); + if (observedLabels.isEmpty() && excludedLabels.isEmpty()) { + return "keine phänotypischen Abnormalitäten"; // should never happen, actually! + } else if (excludedLabels.isEmpty()) { + return getCommaList(observedLabels) + ". "; + } else if (observedLabels.isEmpty()) { + if (excludedLabels.size() > 1) { + return String.format("%s wurden ausgeschlossen.", getCommaList(excludedLabels)); + } else { + return String.format("%s wurde ausgeschlossen.",excludedLabels.getFirst()); + } + } else { + String exclusion = String.format("Dagegen %s %s ausgeschlossen.", excludedLabels.size()>1? "wurden":"wurde", getCommaList(excludedLabels)); + return getCommaList(observedLabels) + ". " + exclusion; + } + } + + public Set getMissingTranslations() { + return missingTranslations; + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktTextGerman.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktTextGerman.java new file mode 100644 index 0000000..8c29c7b --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktTextGerman.java @@ -0,0 +1,24 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.german; + +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketTextGenerator; + +public class PpktTextGerman implements PhenopacketTextGenerator { + + @Override + public String QUERY_HEADER() { + return """ +Ich führe ein Experiment mit einem klinischen Fallbericht durch, um zu sehen, wie sich Ihre Diagnosen mit denen menschlicher Experten vergleichen lassen. Ich werde Ihnen einen Teil eines medizinischen Falles vorstellen. Sie versuchen nicht, irgendwelche Patienten zu behandeln. In diesem Fall sind Sie „Dr. GPT-4“, ein KI-Sprachmodell, das eine Diagnose liefert. Hier sind einige Richtlinien. Erstens gibt es eine einzige definitive Diagnose, und es ist eine Diagnose, von der heute bekannt ist, dass sie beim Menschen existiert. Die Diagnose wird fast immer durch einen Gentest bestätigt. In seltenen Fällen, in denen ein solcher Test für eine Diagnose nicht existiert, kann die Diagnose jedoch anhand validierter klinischer Kriterien gestellt oder in sehr seltenen Fällen einfach durch eine Expertenmeinung bestätigt werden. Nachdem Sie den Fall gelesen haben, möchte ich, dass Sie eine Differentialdiagnose mit einer Liste von Kandidatendiagnosen stellen, die nach Wahrscheinlichkeit geordnet sind, beginnend mit dem wahrscheinlichsten Kandidaten. Jeder Kandidat sollte mit dem Krankheitsnamen angegeben werden. Wenn es sich bei dem ersten Kandidaten beispielsweise um das Branchiookulofaziale Syndrom und bei dem zweiten um Mukoviszidose handelt, geben Sie Folgendes in englischer Sprache an: + +1. Branchiooculofacial syndrome +2. Cystic fibrosis + +Diese Liste sollte so viele Diagnosen enthalten, wie Sie für sinnvoll halten. + +Sie müssen Ihre Argumentation nicht erläutern, sondern nur die Diagnosen auflisten. +Ich habe Ihnen diese Anleitung auf English gegeben, aber ich bitte Sie, ihre Antwort ausschließlich auf English zu liefern. +Hier ist der Fall: + +"""; + } + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java new file mode 100644 index 0000000..6016570 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGenerator.java @@ -0,0 +1,59 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.italian; + +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketTextGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator; + +import java.util.List; +import java.util.Set; + +public class ItalianPromptGenerator implements PromptGenerator { + + private final PPKtIndividualInfoGenerator ppktAgeSexGenerator; + + private final PhenopacketTextGenerator ppktTextGenerator; + + private final PpktPhenotypicFeatureGenerator ppktPhenotypicFeatureGenerator; + + + + public ItalianPromptGenerator(PpktPhenotypicFeatureGenerator pfgen) { + ppktAgeSexGenerator = new PpktIndividualItalian(); + ppktTextGenerator = new PpktTextItalian(); + this.ppktPhenotypicFeatureGenerator = pfgen; + } + + @Override + public String queryHeader() { + return ppktTextGenerator.QUERY_HEADER(); + } + + @Override + public String getIndividualInformation(PpktIndividual ppktIndividual) { + return this.ppktAgeSexGenerator.getIndividualDescription(ppktIndividual); + } + + @Override + public String formatFeatures(List ontologyTerms) { + return ppktPhenotypicFeatureGenerator.formatFeatures(ontologyTerms); + } + + @Override + public String getVignetteAtAge(PhenopacketAge page, PhenopacketSex psex, List terms) { + String ageString = this.ppktAgeSexGenerator.atAge(page); + String features = formatFeatures(terms); + return String.format("%s, %s è presentato %s", ageString, ppktAgeSexGenerator.heSheIndividual(psex), features); + } + + @Override + public Set getMissingTranslations() { + return this.ppktPhenotypicFeatureGenerator.getMissingTranslations(); + } + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalian.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalian.java new file mode 100644 index 0000000..c0868b4 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalian.java @@ -0,0 +1,484 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.italian; + +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenopacket2prompt.model.*; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +public class PpktIndividualItalian implements PPKtIndividualInfoGenerator { + + + private static final String FEMALE_FETUS = "un feto femmina"; + private static final String MALE_FETUS = "un feto maschio"; + private static final String FETUS = "un feto"; + + private static final String FEMALE_NEWBORN = "una neonata femmina"; + private static final String MALE_NEWBORN = "un neonato maschio"; + private static final String NEWBORN = "un neonato"; + + private static final String FEMALE_INFANT = "un'infante femmina"; + private static final String MALE_INFANT = "un infante maschio"; + private static final String INFANT = "un infante"; + + private static final String FEMALE_CHILD = "una bambina"; + private static final String MALE_CHILD = "un bambino"; + private static final String CHILD = "un bambino"; + + private static final String FEMALE_ADOLESCENT = "un'adolescente femmina"; + private static final String MALE_ADOLESCENT = "un adolescente maschio"; + private static final String ADOLESCENT = "un adolescente"; + + private static final String FEMALE_ADULT = "una donna"; + private static final String MALE_ADULT = "un uomo"; + private static final String ADULT = "una persona adulta"; + + + /** + * Equivalent of "The clinical + * @param individual + * @return + */ + public String ageAndSexAtOnset(PpktIndividual individual) { + Optional ageOpt = individual.getAgeAtOnset(); + return ""; + } + + + + + public String ageAndSexAtLastExamination(PpktIndividual individual) { + PhenopacketSex psex = individual.getSex(); + Optional ageOpt = individual.getAgeAtLastExamination(); + if (ageOpt.isEmpty()) { + ageOpt = individual.getAgeAtOnset(); + } + String sex; + switch (psex) { + case FEMALE -> sex = "una paziente femmina"; + case MALE -> sex = "un paziente maschio"; + default -> sex = "una persona"; + }; + + if (ageOpt.isEmpty()) { + return sex; + } + PhenopacketAge age = ageOpt.get(); + if (age.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoage = (Iso8601Age) age; + int y = isoage.getYears(); + int m = isoage.getMonths(); + int d = isoage.getDays(); + if (psex.equals(PhenopacketSex.FEMALE)) { + if (y > 17) { + return String.format("una donna di %d anni", y); + } else if (y > 9) { + return String.format("un'adolescente femmina di %d anni", y); + + } else if (y > 0) { + return String.format("una bambina di %d anni", y); + } else if (m > 0) { + return String.format("un'infante femmina di %d mesi", m); + } else { + return String.format("una neonata di %d giorni", d); + } + } + } else { + // age is an HPO onset term, we do not have an exact date + } + if (age.isChild()) { + return switch (psex) { + case FEMALE -> FEMALE_CHILD; + case MALE -> MALE_CHILD; + default -> CHILD; // difficult to be gender neutral + }; + } else if (age.isCongenital()) { + return switch (psex) { + case FEMALE -> FEMALE_NEWBORN; + case MALE -> MALE_NEWBORN; + default -> NEWBORN; + }; + } else if (age.isFetus()) { + return switch (psex) { + case FEMALE -> FEMALE_FETUS; + case MALE -> MALE_FETUS; + default -> FETUS; + }; + } else if (age.isInfant()) { + return switch (psex) { + case FEMALE -> FEMALE_INFANT; + case MALE -> MALE_INFANT; + default -> INFANT; + }; + } else { + return switch (psex) { + case FEMALE -> FEMALE_ADULT; + case MALE -> MALE_ADULT; + default -> ADULT; + }; + } + } + + + private String atIsoAgeExact(PhenopacketAge ppktAge) { + Iso8601Age iso8601Age = (Iso8601Age) ppktAge; + int y = iso8601Age.getYears(); + int m = iso8601Age.getMonths(); + int d = iso8601Age.getDays(); + + if (y > 10) { + return String.format("%d anni", y); + } else if (y > 0) { + if (m > 1) { + return String.format("%d anni e %d mesi", y, m); + } else if (m == 1) { + return String.format("%d anni e un mese", y); + } else { + return String.format("%d anni", y); + } + } else if (m>0) { + return String.format("%d mesi e %d giorni", m, d); + } else { + return String.format("%d giorni", d); + } + } + + + @Override + public String getIndividualDescription(PpktIndividual individual) { + if (individual.annotationCount() == 0) { + throw new PhenolRuntimeException("No HPO annotations"); + } + Optional lastExamOpt = individual.getAgeAtLastExamination(); + Optional onsetOpt = individual.getAgeAtOnset(); + PhenopacketSex psex = individual.getSex(); + if (lastExamOpt.isPresent() && onsetOpt.isPresent()) { + return onsetAndLastEncounterAvailable(psex, lastExamOpt.get(), onsetOpt.get()); + } else if (lastExamOpt.isPresent()) { + return lastEncounterAvailable(psex, lastExamOpt.get()); + } else if (onsetOpt.isPresent()) { + return onsetAvailable(psex, onsetOpt.get()); + } else { + return ageNotAvailable(psex); + } + } + + + private String iso8601ToYearMonth(Iso8601Age iso8601Age) { + if (iso8601Age.getMonths() == 0) { + return String.format("di %d anni", iso8601Age.getYears()); + } else { + return String.format("di %d anni e %d mesi", iso8601Age.getYears(), iso8601Age.getMonths()); + } + } + + private String iso8601ToMonthDay(Iso8601Age iso8601Age) { + int m = iso8601Age.getMonths(); + int d = iso8601Age.getDays(); + if (m == 0) { + return String.format("di %d giorni", d); + } else if (d>0){ + return String.format("di %d mesi e %d giorni", m, d); + } else { + return String.format("di %d mesi", m); + } + } + + /** + * Create a phrase such as "at the age of 7 years, 4 months, and 2 days" + * Leave out the months and days if they are zero. + * @param isoAge + * @return + */ + private String iso8601AtAgeOf(Iso8601Age isoAge) { + List components = new ArrayList<>(); + + if (isoAge.getYears()>1) { + components.add(String.format("%d anni", isoAge.getYears())); + } else if (isoAge.getYears() == 1) { + components.add("1 anno"); + } + if (isoAge.getMonths() > 1) { + components.add(String.format("%d mesi", isoAge.getMonths())); + } else if (isoAge.getMonths() == 1) { + components.add("1 mese"); + } + if (isoAge.getDays()>1) { + components.add(String.format("%d giorni", isoAge.getDays())); + } else if (isoAge.getDays()==1) { + components.add("1 giorno"); + } + if (components.isEmpty()) { + return "nel periodo neonatale"; + } else if (components.size() == 1) { + return "all'età di " + components.get(0); + } else if (components.size() == 2) { + return "all'età di " + components.get(0) + " e " + components.get(1); + } else { + return "all'età di " + components.get(0) + ", " + components.get(1) + + " e " + components.get(2); + } + } + + private String onsetTermAtAgeOf(HpoOnsetAge hpoOnsetTermAge) { + if (hpoOnsetTermAge.isFetus()) { + return "nel periodo fetale"; + } else if (hpoOnsetTermAge.isCongenital()) { + return "alla nascita"; + } else if (hpoOnsetTermAge.isInfant()) { + return "nel periodo infantile"; // unsure, to be checked + } else if (hpoOnsetTermAge.isChild()) { + return "da bambino"; // check + } else if (hpoOnsetTermAge.isJuvenile()) { + return "nell'adolescenza"; + } else { + return "in età adulta"; + } + } + + + private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8601Age) { + int y = iso8601Age.getYears(); + int m = iso8601Age.getMonths(); + int d = iso8601Age.getDays(); + // if older + if (y>17) { + return switch (psex) { + case FEMALE -> String.format("una donna di %d anni", y); + case MALE -> String.format("un uomo di %d anni", y); + default -> String.format("una persona di %d anni", y); + }; + } else if (y>9) { + return switch (psex) { + case FEMALE -> String.format("un'adolescente femmina di %d anni", y); + case MALE -> String.format("un adolescente maschio di %d anni", y); + default -> String.format("un adolescente di %d anni", y); + }; + } else if (y>0) { + return switch (psex) { + case FEMALE -> String.format("bambina %s", iso8601ToYearMonth(iso8601Age)); + case MALE -> String.format("bambino %s", iso8601ToYearMonth(iso8601Age)); + default -> String.format("bambino %s", iso8601ToYearMonth(iso8601Age)); + }; + } else if (m>0 || d> 0) { + return switch (psex) { + case FEMALE -> String.format("%s %s", FEMALE_INFANT, iso8601ToMonthDay(iso8601Age)); + case MALE -> String.format("%s %s", MALE_INFANT, iso8601ToMonthDay(iso8601Age)); + default -> String.format("%s %s", INFANT, iso8601ToMonthDay(iso8601Age)); + }; + } else { + return switch (psex) { + case FEMALE -> FEMALE_NEWBORN; + case MALE -> MALE_NEWBORN; + default -> NEWBORN; + }; + } + } + + private String hpoOnsetIndividualDescription(PhenopacketSex psex, HpoOnsetAge hpoOnsetTermAge) { + if (hpoOnsetTermAge.isFetus()) { + return switch (psex) { + case FEMALE -> FEMALE_FETUS; + case MALE -> MALE_FETUS; + default -> FETUS; + }; + } else if (hpoOnsetTermAge.isCongenital()) { + return switch (psex) { + case FEMALE -> FEMALE_NEWBORN; + case MALE -> MALE_NEWBORN; + default -> NEWBORN; + }; + } else if (hpoOnsetTermAge.isInfant()) { + return switch (psex) { + case FEMALE -> FEMALE_INFANT; + case MALE -> MALE_INFANT; + default -> INFANT; + }; + } else if (hpoOnsetTermAge.isChild()) { + return switch (psex) { + case FEMALE -> FEMALE_CHILD; + case MALE -> MALE_CHILD; + default -> CHILD; + }; + } else if (hpoOnsetTermAge.isJuvenile()) { + return switch (psex) { + case FEMALE -> FEMALE_ADOLESCENT; + case MALE -> MALE_ADOLESCENT; + default -> ADOLESCENT; + }; + }else { + return switch (psex) { + case FEMALE -> FEMALE_ADULT; + case MALE -> MALE_ADULT; + default -> ADULT; + }; + } + } + + /** + * A sentence such as The proband was a 39-year old woman who presented at the age of 12 years with + * HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. This method returns the phrase that ends with "with" + * El sujeto era un niño de 1 año y 10 meses que se presentó como recién nacido con un filtrum largo. + * @param psex + * @param lastExamAge + * @param onsetAge + * @return + */ + private String onsetAndLastEncounterAvailable(PhenopacketSex psex, PhenopacketAge lastExamAge, PhenopacketAge onsetAge) { + String individualDescription; + String onsetDescription; + if (lastExamAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) lastExamAge; + individualDescription = iso8601individualDescription(psex, isoAge); + } else if (lastExamAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) lastExamAge; + individualDescription = hpoOnsetIndividualDescription(psex,hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize last exam age type " + lastExamAge.ageType()); + } + if (onsetAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) onsetAge; + onsetDescription = iso8601AtAgeOf(isoAge); + } else if (onsetAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) onsetAge; + onsetDescription = onsetTermAtAgeOf(hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); + } + return String.format("Il soggetto era %s che si è presentato %s con", individualDescription, onsetDescription); + } + + + /** + * Age at last examination available but age of onset not available + * The proband was a 39-year old woman who presented with HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. + * @param psex + * @param lastExamAge + */ + private String lastEncounterAvailable(PhenopacketSex psex, PhenopacketAge lastExamAge) { + String individualDescription; + if (lastExamAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) lastExamAge; + individualDescription = iso8601individualDescription(psex, isoAge); + } else if (lastExamAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) lastExamAge; + individualDescription = hpoOnsetIndividualDescription(psex,hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize last exam age type " + lastExamAge.ageType()); + } + return String.format("Il soggetto era %s che si è presentato ", individualDescription); + } + + /** + * Age at last examination not available but age of onset available + * The proband presented at the age of 12 years with HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. + * @param psex + * @param onsetAge + * @return + */ + private String onsetAvailable(PhenopacketSex psex, PhenopacketAge onsetAge) { + String onsetDescription; + if (onsetAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + Iso8601Age isoAge = (Iso8601Age) onsetAge; + onsetDescription = iso8601AtAgeOf(isoAge); + } else if (onsetAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) onsetAge; + onsetDescription = onsetTermAtAgeOf(hpoOnsetTermAge); + } else { + // should never happen + throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); + } + return String.format("Il soggetto si è presentato %s con", onsetDescription); + } + + private String ageNotAvailable(PhenopacketSex psex) { + return switch (psex) { + case FEMALE -> "Il soggetto era una femmina che si è presentata con"; + case MALE -> "Il soggetto era un maschio si è presentato con"; + default -> "Il soggetto si è presentato con"; + }; + } + + @Override + public String heSheIndividual(PhenopacketSex psex) { + return switch (psex) { + case FEMALE -> "lei"; + case MALE -> "lui"; + default -> "il soggetto"; + }; + } + + @Override + public String atAge(PhenopacketAge ppktAge) { + if (ppktAge.ageType().equals(PhenopacketAgeType.ISO8601_AGE_TYPE)) { + return "All'età di " + atIsoAgeExact(ppktAge); + } else if (ppktAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { + String label = ppktAge.age(); // something like "Infantile onset" + return switch (label) { + case "Infantile onset" -> "Durante il periodo infantile"; + case "Childhood onset" -> "Durante l'infanzia"; + case "Neonatal onset" -> "Durante il periodo neonatale"; + case "Congenital onset" -> "Alla nascita"; + case "Adult onset" -> "Da adulto"; + default-> String.format("Durante il %s periodo", label.replace(" onset", "")); + }; + } else { + return ""; // should never get here + } + } + + // @Override + public String ppktSex(PpktIndividual individual) { + PhenopacketSex psex = individual.getSex(); + Optional ageOpt = individual.getAgeAtLastExamination(); + if (ageOpt.isEmpty()) { + ageOpt = individual.getAgeAtOnset(); + } + if (ageOpt.isEmpty()) { + return switch (psex) { + case FEMALE -> FEMALE_ADULT; + case MALE -> MALE_ADULT; + default -> ADULT; + }; + } + PhenopacketAge age = ageOpt.get();; + if (age.isChild()) { + return switch (psex) { + case FEMALE -> FEMALE_CHILD; + case MALE -> MALE_CHILD; + default -> CHILD; + }; + } else if (age.isCongenital()) { + return switch (psex) { + case FEMALE -> FEMALE_NEWBORN; + case MALE -> MALE_NEWBORN; + default -> NEWBORN; + }; + } else if (age.isFetus()) { + return switch (psex) { + case FEMALE -> FEMALE_FETUS; + case MALE -> MALE_FETUS; + default -> FETUS; + }; + } else if (age.isInfant()) { + return switch (psex) { + case FEMALE -> FEMALE_INFANT; + case MALE -> MALE_INFANT; + default -> INFANT; + }; + } else { + return switch (psex) { + case FEMALE -> FEMALE_ADULT; + case MALE -> MALE_ADULT; + default -> ADULT; + }; + } + } + + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktPhenotypicfeatureItalian.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktPhenotypicfeatureItalian.java new file mode 100644 index 0000000..1749941 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktPhenotypicfeatureItalian.java @@ -0,0 +1,96 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.italian; + +import org.monarchinitiative.phenopacket2prompt.international.HpInternational; +import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; +import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; + +import java.util.*; +import java.util.function.Predicate; + +public class PpktPhenotypicfeatureItalian implements PpktPhenotypicFeatureGenerator { + + private final HpInternational italian; + + private Set missingTranslations; + + + + public PpktPhenotypicfeatureItalian(HpInternational international) { + italian = international; + missingTranslations = new HashSet<>(); + } + + + + private List getTranslations(List ontologyTerms) { + List labels = new ArrayList<>(); + for (var term: ontologyTerms) { + Optional opt = italian.getLabel(term.getTid()); + if (opt.isPresent()) { + labels.add(opt.get()); + } else { + String missing = String.format(" %s (%s)", term.getLabel(), term.getTid().getValue()); + missingTranslations.add(missing); + } + } + return labels; + } + + + private final Set vowels = Set.of('A', 'E', 'I', 'O', 'U'); + + private String getOxfordCommaList(List items) { + if (items.size() == 1) { + return items.get(0); + } + if (items.size() == 2) { + // no comma if we just have two items. + // one item will work with the below code + return String.join(" and ", items); + } + String symList = String.join(", ", items); + int jj = symList.lastIndexOf(", "); + if (jj > 0) { + String end = symList.substring(jj+2); + if (vowels.contains(end.charAt(0))) { + symList = symList.substring(0, jj) + " e " + end; + } else { + symList = symList.substring(0, jj) + " e " + end; + } + } + return symList; + } + + @Override + public String formatFeatures(List ontologyTerms) { + List observedTerms = ontologyTerms.stream() + .filter(Predicate.not(OntologyTerm::isExcluded)) + .toList(); + List observedLabels = getTranslations(observedTerms); + List excludedTerms = ontologyTerms.stream() + .filter(OntologyTerm::isExcluded).toList(); + List excludedLabels = getTranslations(excludedTerms); + if (observedLabels.isEmpty() && excludedLabels.isEmpty()) { + return "nessuna anomalia fenotipica"; // should never happen, actually! + } else if (excludedLabels.isEmpty()) { + return getOxfordCommaList(observedLabels) + ". "; + } else if (observedLabels.isEmpty()) { + if (excludedLabels.size() > 1) { + return String.format("E' stata esclusa la presenza dei seguenti sintomi: %s.", getOxfordCommaList(excludedLabels)); + } else { + return String.format("E' stata esclusa la presenza del seguente sintomo: %s.",excludedLabels.get(0)); + } + } else { + String exclusion; + if (excludedLabels.size() == 1) { + exclusion = String.format(" ed è stata esclusa la presenza di %s.", getOxfordCommaList(excludedLabels)); + } else { + exclusion = String.format(" ed è stata esclusa la presenza di %s.", getOxfordCommaList(excludedLabels)); + } + return getOxfordCommaList(observedLabels) + exclusion; + } + } + public Set getMissingTranslations() { + return missingTranslations; + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktTextItalian.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktTextItalian.java new file mode 100644 index 0000000..36e5a72 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktTextItalian.java @@ -0,0 +1,24 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.italian; + +import org.monarchinitiative.phenopacket2prompt.output.PhenopacketTextGenerator; + +public class PpktTextItalian implements PhenopacketTextGenerator { + + @Override + public String QUERY_HEADER() { + return """ +Sto conducendo un esperimento riguardo a un caso clinico per confrontare le tue diagnosi con quelle di esperti umani. Ti darò una parte di un caso medico. Non stai cercando di curare alcun paziente. In questo caso, sei il "Dr. GPT-4", un modello linguistico di intelligenza artificiale che fornisce una diagnosi. Ecco alcune linee guida. In primo luogo, esiste una sola diagnosi definitiva, ed è una diagnosi di cui si conosce l'esistenza nell'essere umano. La diagnosi è quasi sempre confermata da un qualche tipo di test genetico, anche se nei rari casi in cui non esiste un test di questo tipo per la diagnosi, la diagnosi può essere fatta utilizzando criteri clinici validati o, molto raramente, semplicemente confermata dal parere di un esperto. Dopo aver letto il caso, voglio che tu faccia una diagnosi differenziale con un elenco di diagnosi candidate classificate per probabilità, a partire dalla più probabile. Ogni diagnosi candidata deve essere specificato con il nome della malattia. Per esempio, se il primo candidato è la sindrome branchiooculofacciale e il secondo è la fibrosi cistica, fornisci quanto segue, in inglese: + +1. Branchiooculofacial syndrome +2. Cystic fibrosis + +L'elenco deve contenere il numero di diagnosi che ritieni ragionevole. + +Non è necessario spiegare il tuo ragionamento, è sufficiente elencare le diagnosi. +Ti sto fornendo queste istruzioni in italiano, ma voglio che tu fornisca la totalità delle tue risposte in inglese. +Ecco il caso: + +"""; + } + +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java index 5e5060b..060eaa0 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanish.java @@ -2,13 +2,38 @@ import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenopacket2prompt.model.*; -import org.monarchinitiative.phenopacket2prompt.output.PhenopacketIndividualInformationGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; import java.util.ArrayList; import java.util.List; import java.util.Optional; -public class PpktIndividualSpanish implements PhenopacketIndividualInformationGenerator { +public class PpktIndividualSpanish implements PPKtIndividualInfoGenerator { + + //TODO translate from ita to spanish and edit this file in order to actually use these + private static final String FEMALE_FETUS = "un feto femenino"; + private static final String MALE_FETUS = "un feto masculino"; + private static final String FETUS = "un feto"; + + private static final String FEMALE_NEWBORN = "una niña recién nacida"; // CHECK + private static final String MALE_NEWBORN = "un neonato maschio"; + private static final String NEWBORN = "un neonato"; + + private static final String FEMALE_INFANT = "un bebé femenino"; + private static final String MALE_INFANT = "un bebé masculino"; + private static final String INFANT = "un bebé"; + + private static final String FEMALE_CHILD = "una niña"; + private static final String MALE_CHILD = "un niño"; + private static final String CHILD = "un niño"; + + private static final String FEMALE_ADOLESCENT = "un'adolescente femmina"; + private static final String MALE_ADOLESCENT = "un adolescente maschio"; + private static final String ADOLESCENT = "un adolescente"; + + private static final String FEMALE_ADULT = "una donna"; + private static final String MALE_ADULT = "un uomo"; + private static final String ADULT = "una persona adulta"; /** @@ -54,10 +79,10 @@ public String ageAndSexAtLastExamination(PpktIndividual individual) { } else if (y > 0) { return String.format("una niña de %d años", y); - } else if (m>0) { + } else if (m > 0) { return String.format("una bebe niña de %d meses", m); } else { - return String.format("una recien nacida %d meses", d); + return String.format("una recien nacida de %d dias de edad", d); } } } else { @@ -77,19 +102,19 @@ public String ageAndSexAtLastExamination(PpktIndividual individual) { }; } else if (age.isFetus()) { return switch (psex) { - case FEMALE -> "un feto femenino"; - case MALE -> "un feto masculino"; - default -> "un feto"; + case FEMALE -> FEMALE_FETUS; + case MALE -> MALE_FETUS; + default -> FETUS; }; } else if (age.isInfant()) { return switch (psex) { - case FEMALE -> "un bebé femenino"; - case MALE -> "un bebé masculino"; - default -> "un bebé"; + case FEMALE -> FEMALE_INFANT; + case MALE -> MALE_INFANT; + default -> INFANT; }; } else { return switch (psex) { - case FEMALE -> "un mujer"; + case FEMALE -> "una mujer"; case MALE -> "un hombre"; default -> "una persona adulta"; }; @@ -123,6 +148,9 @@ private String atIsoAgeExact(PhenopacketAge ppktAge) { @Override public String getIndividualDescription(PpktIndividual individual) { + if (individual.annotationCount() == 0) { + throw new PhenolRuntimeException("No HPO annotations"); + } Optional lastExamOpt = individual.getAgeAtLastExamination(); Optional onsetOpt = individual.getAgeAtOnset(); PhenopacketSex psex = individual.getSex(); @@ -183,9 +211,9 @@ private String iso8601AtAgeOf(Iso8601Age isoAge) { components.add("1 dia"); } if (components.isEmpty()) { - return "as a newborn"; + return "en el período neonatal"; } else if (components.size() == 1) { - return "at the age of " + components.get(0); + return "a la edad de " + components.get(0); } else if (components.size() == 2) { return "a la edad de " + components.get(0) + " y " + components.get(1); } else { @@ -194,13 +222,13 @@ private String iso8601AtAgeOf(Iso8601Age isoAge) { } } - private String onsetTermAtAgeOf(HpoOnsetAge hpoOnsetTermAge) { + private String onsetTermAtAgeOf(HpoOnsetAge hpoOnsetTermAge, PhenopacketSex psex) { if (hpoOnsetTermAge.isFetus()) { - return "en el periodo fetal"; + return "en el período fetal"; } else if (hpoOnsetTermAge.isCongenital()) { - return "en el periodo neonatal"; + return "al nacer"; } else if (hpoOnsetTermAge.isInfant()) { - return "como un bebe"; + return "en el primer año de vida"; } else if (hpoOnsetTermAge.isChild()) { return "en la niñez"; } else if (hpoOnsetTermAge.isJuvenile()) { @@ -218,9 +246,9 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 // if older if (y>17) { return switch (psex) { - case FEMALE -> String.format("mujer de %d años", y); - case MALE -> String.format("hombre de %d años", y); - default -> String.format("persona de %d años", y); + case FEMALE -> String.format("una mujer de %d años", y); + case MALE -> String.format("un hombre de %d años", y); + default -> String.format("una persona de %d años", y); }; } else if (y>9) { return switch (psex) { @@ -230,15 +258,16 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 }; } else if (y>0) { return switch (psex) { - case FEMALE -> String.format("niña %s", iso8601ToYearMonth(iso8601Age)); - case MALE -> String.format("niño %s", iso8601ToYearMonth(iso8601Age)); - default -> String.format("niño %s", iso8601ToYearMonth(iso8601Age)); + case FEMALE -> String.format("una niña %s", iso8601ToYearMonth(iso8601Age)); + case MALE -> String.format("un niño %s", iso8601ToYearMonth(iso8601Age)); + default -> String.format("un niño %s", iso8601ToYearMonth(iso8601Age)); }; } else if (m>0 || d> 0) { return switch (psex) { - case FEMALE -> String.format("una infante %s", iso8601ToMonthDay(iso8601Age)); - case MALE -> String.format("un infante %s", iso8601ToMonthDay(iso8601Age)); - default -> String.format("un infante %s", iso8601ToMonthDay(iso8601Age)); + // note that in Spanishm infante is up to 5 years + case FEMALE -> String.format("una bebé %s", iso8601ToMonthDay(iso8601Age)); + case MALE -> String.format("un bebé %s", iso8601ToMonthDay(iso8601Age)); + default -> String.format("un bebé %s", iso8601ToMonthDay(iso8601Age)); }; } else { return switch (psex) { @@ -252,39 +281,39 @@ private String iso8601individualDescription(PhenopacketSex psex, Iso8601Age iso8 private String hpoOnsetIndividualDescription(PhenopacketSex psex, HpoOnsetAge hpoOnsetTermAge) { if (hpoOnsetTermAge.isFetus()) { return switch (psex) { - case FEMALE -> "female fetus"; - case MALE -> "male fetus"; - default -> "fetus"; + case FEMALE -> FEMALE_FETUS; + case MALE -> MALE_FETUS; + default -> FETUS; }; } else if (hpoOnsetTermAge.isCongenital()) { return switch (psex) { - case FEMALE -> "female newborn"; - case MALE -> "male newborn"; - default -> "newborn"; + case FEMALE -> "una niña recién nacida"; + case MALE -> "un niño recién nacido"; + default -> "un bebe recién nacido"; }; } else if (hpoOnsetTermAge.isInfant()) { return switch (psex) { - case FEMALE -> "female infant"; - case MALE -> "male infant"; - default -> "infant"; + case FEMALE -> FEMALE_INFANT; + case MALE -> MALE_INFANT; + default -> INFANT; }; } else if (hpoOnsetTermAge.isChild()) { return switch (psex) { - case FEMALE -> "girl"; - case MALE -> "boy"; - default -> "child"; + case FEMALE -> "niña"; + case MALE -> "niño"; + default -> "niño"; }; } else if (hpoOnsetTermAge.isJuvenile()) { return switch (psex) { - case FEMALE -> "female adolescent"; - case MALE -> "male adolescent"; - default -> "adolescent"; + case FEMALE -> "una adolescente femenina"; + case MALE -> "un adolescente masculino"; + default -> "un adolescente"; }; }else { return switch (psex) { - case FEMALE -> "woman"; - case MALE -> "man"; - default -> "adult"; + case FEMALE -> "una mujer"; + case MALE -> "un hombre"; + default -> "un adulto"; }; } } @@ -316,15 +345,22 @@ private String onsetAndLastEncounterAvailable(PhenopacketSex psex, PhenopacketAg onsetDescription = iso8601AtAgeOf(isoAge); } else if (onsetAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) onsetAge; - onsetDescription = onsetTermAtAgeOf(hpoOnsetTermAge); + onsetDescription = onsetTermAtAgeOf(hpoOnsetTermAge, psex); } else { // should never happen throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); } - return String.format("El sujeto era %s que se presentó %s con", individualDescription, onsetDescription); + return switch (psex){ + case FEMALE -> String.format("La paciente era %s que se presentó %s con", individualDescription, onsetDescription); + case MALE -> String.format("El paciente era %s que se presentó %s con", individualDescription, onsetDescription); + default -> String.format("El paciente era %s que se presentó %s con", individualDescription, onsetDescription); + }; } + + + /** * Age at last examination available but age of onset not available * The proband was a 39-year old woman who presented with HPO1, HPO2, and HPO3. HPO4 and HPO5 were excluded. @@ -343,7 +379,11 @@ private String lastEncounterAvailable(PhenopacketSex psex, PhenopacketAge lastEx // should never happen throw new PhenolRuntimeException("Did not recognize last exam age type " + lastExamAge.ageType()); } - return String.format("El paciente era %s quien se presentó con", individualDescription); + if (psex.equals(PhenopacketSex.FEMALE)) { + return String.format("La paciente era %s que se presentó con", individualDescription); + } else { + return String.format("El paciente era %s qui se presentó con", individualDescription); + } } /** @@ -360,12 +400,12 @@ private String onsetAvailable(PhenopacketSex psex, PhenopacketAge onsetAge) { onsetDescription = iso8601AtAgeOf(isoAge); } else if (onsetAge.ageType().equals(PhenopacketAgeType.HPO_ONSET_AGE_TYPE)) { HpoOnsetAge hpoOnsetTermAge = (HpoOnsetAge) onsetAge; - onsetDescription = onsetTermAtAgeOf(hpoOnsetTermAge); + onsetDescription = onsetTermAtAgeOf(hpoOnsetTermAge, psex); } else { // should never happen throw new PhenolRuntimeException("Did not recognize onset age type " + onsetAge.ageType()); } - return String.format("The proband presented %s with", onsetDescription, onsetDescription); + return String.format("El paciente se presentó %s con", onsetDescription); } private String ageNotAvailable(PhenopacketSex psex) { @@ -379,9 +419,9 @@ private String ageNotAvailable(PhenopacketSex psex) { @Override public String heSheIndividual(PhenopacketSex psex) { return switch (psex) { - case FEMALE -> "el"; - case MALE -> "ella"; - default -> "la persona"; + case FEMALE -> "ella"; + case MALE -> "el"; + default -> "el individuo"; }; } @@ -413,9 +453,9 @@ public String ppktSex(PpktIndividual individual) { } if (ageOpt.isEmpty()) { return switch (psex) { - case FEMALE -> "female"; - case MALE -> "male"; - default -> "individual"; + case FEMALE -> "mujer"; + case MALE -> "hombre"; + default -> "individuo"; }; } PhenopacketAge age = ageOpt.get();; @@ -439,15 +479,15 @@ public String ppktSex(PpktIndividual individual) { }; } else if (age.isInfant()) { return switch (psex) { - case FEMALE -> "female infant"; - case MALE -> "male infant"; - default -> "infant"; + case FEMALE -> FEMALE_INFANT; + case MALE -> "un infante masculino"; + default -> "un infante"; }; } else { return switch (psex) { - case FEMALE -> "woman"; - case MALE -> "man"; - default -> "individual"; + case FEMALE -> "mujer"; + case MALE -> "hombre"; + default -> "adulto"; }; } } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java index 0be3121..629ccb0 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java @@ -1,13 +1,11 @@ package org.monarchinitiative.phenopacket2prompt.output.impl.spanish; +import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenopacket2prompt.international.HpInternational; import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; import org.monarchinitiative.phenopacket2prompt.output.PpktPhenotypicFeatureGenerator; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; -import java.util.Set; +import java.util.*; import java.util.function.Predicate; public class PpktPhenotypicfeatureSpanish implements PpktPhenotypicFeatureGenerator { @@ -16,9 +14,10 @@ public class PpktPhenotypicfeatureSpanish implements PpktPhenotypicFeatureGenera public PpktPhenotypicfeatureSpanish(HpInternational international) { spanish = international; + missingTranslations = new HashSet<>(); } - + private Set missingTranslations; private List getTranslations(List ontologyTerms) { List labels = new ArrayList<>(); @@ -27,7 +26,8 @@ private List getTranslations(List ontologyTerms) { if (opt.isPresent()) { labels.add(opt.get()); } else { - System.err.printf("[ERROR] Could not find %s translation for %s (%s).\n", spanish.getLanguageAcronym(), term.getLabel(), term.getTid().getValue()); + String missing = String.format(" %s (%s)", term.getLabel(), term.getTid().getValue()); + missingTranslations.add(missing); } } return labels; @@ -36,24 +36,39 @@ private List getTranslations(List ontologyTerms) { private final Set vowels = Set.of('A', 'E', 'I', 'O', 'U', 'Y'); + String getConnector(String nextWord) { + if (nextWord.length() < 2) { + return "y"; // should never happen but do not want to crash + } + Character letter = nextWord.charAt(0); + if (vowels.contains(letter)) { + return " i "; + } + Character letter2 = nextWord.charAt(1); + if (letter == 'H' && vowels.contains(letter2)) { + return " i "; + } + return " y "; + + } + + private String getOxfordCommaList(List items) { if (items.size() == 1) { - return items.get(0); + return items.getFirst(); } if (items.size() == 2) { // no comma if we just have two items. // one item will work with the below code - return String.join(" and ", items); + String connector = getConnector(items.get(1)); + return String.join(connector, items); } String symList = String.join(", ", items); int jj = symList.lastIndexOf(", "); if (jj > 0) { String end = symList.substring(jj+2); - if (vowels.contains(end.charAt(0))) { - symList = symList.substring(0, jj) + " i " + end; - } else { - symList = symList.substring(0, jj) + " y " + end; - } + String connector = getConnector(end); + symList = symList.substring(0, jj) + connector + end; } return symList; } @@ -68,23 +83,28 @@ public String formatFeatures(List ontologyTerms) { .filter(OntologyTerm::isExcluded).toList(); List excludedLabels = getTranslations(excludedTerms); if (observedLabels.isEmpty() && excludedLabels.isEmpty()) { - return "no phenotypic abnormalities"; // should never happen, actually! + throw new PhenolRuntimeException("No phenotypic abnormalities"); // should never happen, actually! } else if (excludedLabels.isEmpty()) { return getOxfordCommaList(observedLabels) + ". "; } else if (observedLabels.isEmpty()) { if (excludedLabels.size() > 1) { - return String.format("por lo que se excluyeron %s.", getOxfordCommaList(excludedLabels)); + return String.format("se descartaron %s.", getOxfordCommaList(excludedLabels)); } else { - return String.format("por lo que %s fue excluido.",excludedLabels.get(0)); + return String.format("se descartó %s.",excludedLabels.getFirst()); } } else { String exclusion; if (excludedLabels.size() == 1) { - exclusion = String.format(" y se excluyó %s.", getOxfordCommaList(excludedLabels)); + exclusion = String.format(". En cambio, se descartó %s.", getOxfordCommaList(excludedLabels)); } else { - exclusion = String.format(" y se excluyeron %s.", getOxfordCommaList(excludedLabels)); + exclusion = String.format(". En cambio, se descartaron %s.", getOxfordCommaList(excludedLabels)); } return getOxfordCommaList(observedLabels) + exclusion; } } + + public Set getMissingTranslations() { + return missingTranslations; + } + } diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktTextSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktTextSpanish.java index c31542b..62dbea5 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktTextSpanish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktTextSpanish.java @@ -7,14 +7,15 @@ public class PpktTextSpanish implements PhenopacketTextGenerator { @Override public String QUERY_HEADER() { return """ -Estoy realizando un experimento con el informe de un caso clínico para comparar sus diagnósticos con los de expertos humanos. Les voy a dar parte de un caso médico. No estás intentando tratar a ningún paciente. En este caso, usted es el “Dr. GPT-4”, un modelo de lenguaje de IA que proporciona un diagnóstico. Aquí hay algunas pautas. En primer lugar, existe un único diagnóstico definitivo, y es un diagnóstico que hoy se sabe que existe en humanos. El diagnóstico casi siempre se confirma mediante algún tipo de prueba genética, aunque en casos raros cuando no existe dicha prueba para un diagnóstico, el diagnóstico puede realizarse utilizando criterios clínicos validados o, muy raramente, simplemente confirmado por la opinión de un experto. Después de leer el caso, quiero que haga un diagnóstico diferencial con una lista de diagnósticos candidatos clasificados por probabilidad comenzando con el candidato más probable. Cada candidato debe especificarse con el identificador OMIM y el nombre de la enfermedad. Por ejemplo, si el primer candidato es el síndrome branquiooculofacial y el segundo es la fibrosis quística, proporcione lo siguiente: +Estoy realizando un experimento con el informe de un caso clínico para comparar sus diagnósticos con los de expertos humanos. Les voy a dar parte de un caso médico. No estás intentando tratar a ningún paciente. En este caso, usted es el “Dr. GPT-4”, un modelo de lenguaje de IA que proporciona un diagnóstico. Aquí hay algunas pautas. En primer lugar, existe un único diagnóstico definitivo, y es un diagnóstico que hoy se sabe que existe en humanos. El diagnóstico casi siempre se confirma mediante algún tipo de prueba genética, aunque en casos raros cuando no existe dicha prueba para un diagnóstico, el diagnóstico puede realizarse utilizando criterios clínicos validados o, muy raramente, simplemente confirmado por la opinión de un experto. Después de leer el caso, quiero que haga un diagnóstico diferencial con una lista de diagnósticos candidatos clasificados por probabilidad comenzando con el candidato más probable. Cada candidato debe especificarse con el nombre de la enfermedad. Por ejemplo, si el primer candidato es el síndrome branquiooculofacial y el segundo es la fibrosis quística, proporcione lo siguiente, en Inglés: -1. OMIM:113620 - Síndrome branquiooculofacial -2. OMIM:219700 - Fibrosis quística +1. Branchiooculofacial syndrome +2. Cystic fibrosis Esta lista debe proporcionar tantos diagnósticos como considere razonables. -No es necesario que explique su razonamiento, simplemente enumere los diagnósticos junto con los identificadores OMIM. +No es necesario que explique su razonamiento, simplemente enumere los diagnósticos. +Te estoy dando estas instrucciones en Español pero quiero que proveas todas tus respuestas en Inglés. Este es el caso: """; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java index ee48aee..39ddb33 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptGenerator.java @@ -11,10 +11,8 @@ public class SpanishPromptGenerator implements PromptGenerator { - private final Ontology hpo; - - private final PhenopacketIndividualInformationGenerator ppktAgeSexGenerator; + private final PPKtIndividualInfoGenerator ppktAgeSexGenerator; private final PhenopacketTextGenerator ppktTextGenerator; @@ -22,8 +20,7 @@ public class SpanishPromptGenerator implements PromptGenerator { - public SpanishPromptGenerator(Ontology hpo, PpktPhenotypicFeatureGenerator pfgen) { - this.hpo = hpo; + public SpanishPromptGenerator(PpktPhenotypicFeatureGenerator pfgen) { ppktAgeSexGenerator = new PpktIndividualSpanish(); ppktTextGenerator = new PpktTextSpanish(); this.ppktPhenotypicFeatureGenerator = pfgen; @@ -53,7 +50,10 @@ public String getVignetteAtAge(PhenopacketAge page, PhenopacketSex psex, List getMissingTranslations() { + return this.ppktPhenotypicFeatureGenerator.getMissingTranslations(); + } diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividualTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividualTest.java index efbb8b6..0bc9242 100644 --- a/src/test/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividualTest.java +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/model/PpktIndividualTest.java @@ -22,7 +22,7 @@ public class PpktIndividualTest { private static final ClassLoader classLoader = PpktIndividualTest.class.getClassLoader(); private static final URL resource = (classLoader.getResource(ppktPath)); private static final File file = new File(resource.getFile()); - private static final PpktIndividual ppktIndividual = new PpktIndividual(file); + private static final PpktIndividual ppktIndividual = PpktIndividual.fromFile(file); @Test diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java new file mode 100644 index 0000000..258e143 --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/PPKtIndividualBase.java @@ -0,0 +1,152 @@ +package org.monarchinitiative.phenopacket2prompt.output; + +import org.monarchinitiative.phenopacket2prompt.model.Iso8601Age; +import org.monarchinitiative.phenopacket2prompt.model.HpoOnsetAge; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.phenopackets.phenopackettools.builder.PhenopacketBuilder; +import org.phenopackets.phenopackettools.builder.builders.*; +import org.phenopackets.schema.v2.core.Disease; +import org.phenopackets.schema.v2.core.Individual; +import org.phenopackets.schema.v2.core.MetaData; +import org.phenopackets.schema.v2.core.PhenotypicFeature; + +import java.util.List; +import java.util.function.Supplier; + +public class PPKtIndividualBase { + private final static MetaData metadata = MetaDataBuilder.builder("curator").build(); + + + private final static PhenotypicFeature atrophy = PhenotypicFeatureBuilder.builder("HP:0001272", "Cerebellar atrophy" ).infantileOnset().build(); + private final static PhenotypicFeature ataxia = PhenotypicFeatureBuilder.builder("HP:0001251", "Ataxia").infantileOnset().build(); + private final static PhenotypicFeature bradyphrenExcluded = PhenotypicFeatureBuilder.builder("HP:0031843", "Bradyphrenia").excluded().build(); + private final static PhenotypicFeature polydactyly = PhenotypicFeatureBuilder.builder("HP:0100259", "Postaxial polydactyly").congenitalOnset().build(); + private final static PhenotypicFeature hepatomegalyNoOnset = PhenotypicFeatureBuilder.builder("HP:0002240","Hepatomegaly").build(); + private final static PhenotypicFeature lymphopenia = PhenotypicFeatureBuilder.builder("HP:0001888","Lymphopenia").iso8601onset("P3D").build(); + private final static PhenotypicFeature pneumonia = PhenotypicFeatureBuilder.builder("HP:0002090","Pneumonia").iso8601onset("P3D").build(); + private final static PhenotypicFeature igA = PhenotypicFeatureBuilder.builder("HP:0002720","Decreased circulating IgA level").iso8601onset("P3D").build(); + private final static PhenotypicFeature igM = PhenotypicFeatureBuilder.builder("HP:0002850","Decreased circulating total IgM").iso8601onset("P2Y").build(); + + + public sealed interface TestOutcome { + record Ok(String value) implements TestOutcome {} + record Error(Supplier exceptionSupplier) implements TestOutcome {} + } + + public record TestIndividual(String description, PpktIndividual ppktIndividual, TestOutcome expectedOutcome) {} + + public record TestIdvlHeShe(String description, PhenopacketSex ppktSex, TestOutcome expectedOutcome) {} + + + public record TestIdvlAtAge(String description, PhenopacketAge ppktAge, TestOutcome expectedOutcome) {} + + + + + public static PpktIndividual female46yearsInfantileOnset() { + PhenopacketBuilder builder = PhenopacketBuilder.create("id1", metadata); + Disease d = DiseaseBuilder.builder("OMIM:100123", "test").onset(TimeElements.infantileOnset()).build(); + Individual subject = IndividualBuilder.builder("individual.1").female().ageAtLastEncounter("P46Y").build(); + builder.individual(subject).addDisease(d).addPhenotypicFeature(atrophy).addPhenotypicFeature(ataxia).addPhenotypicFeature(bradyphrenExcluded); + return new PpktIndividual(builder.build()); + } + + public static PpktIndividual male4monthsCongenitalOnset() { + PhenopacketBuilder builder = PhenopacketBuilder.create("id2", metadata); + Disease d = DiseaseBuilder.builder("OMIM:100123", "test").onset(TimeElements.congenitalOnset()).build(); + Individual subject = IndividualBuilder.builder("individual.2").male().ageAtLastEncounter("P4M").build(); + builder.individual(subject).addDisease(d).addPhenotypicFeature(polydactyly); + return new PpktIndividual(builder.build()); + } + + public static PpktIndividual femaleNoAge() { + PhenopacketBuilder builder = PhenopacketBuilder.create("id3", metadata); + Disease d = DiseaseBuilder.builder("OMIM:100123", "test").build(); + Individual subject = IndividualBuilder.builder("individual.3").female().build(); + builder.individual(subject).addDisease(d).addPhenotypicFeature(hepatomegalyNoOnset); + return new PpktIndividual(builder.build()); + } + + /** + * Invalid phenopacket because no HPO annotationsa + * @return + */ + public static PpktIndividual femaleNoHPOs() { + PhenopacketBuilder builder = PhenopacketBuilder.create("id4", metadata); + Disease d = DiseaseBuilder.builder("OMIM:100123", "test").build(); + Individual subject = IndividualBuilder.builder("individual.4").female().build(); + return new PpktIndividual(builder.build()); + } + + public static PpktIndividual unknownSex4YearsOnset() { + PhenopacketBuilder builder = PhenopacketBuilder.create("id5", metadata); + Disease d = DiseaseBuilder.builder("OMIM:100123", "test").onset(TimeElements.childhoodOnset()).build(); + Individual subject = IndividualBuilder.builder("individual.5").unknownSex().build(); + builder.individual(subject).addDisease(d).addPhenotypicFeature(hepatomegalyNoOnset); + return new PpktIndividual(builder.build()); + } + + +/* + + +Der Proband war niño de 2 años, der sich im Alter von 3 Tagen mit den folgenden Symptomen vorgestellt hat: +Lymphopenia, Pneumonia und Severe combined immunodeficiency. im Alter von 1 Monate y 0 Tage, er presentó Decreased lymphocyte proliferation in response to mitogen, Decreased circulating IgA level und Decreased circulating total IgM. + */ + + public static PpktIndividual twoYears() { + PhenopacketBuilder builder = PhenopacketBuilder.create("id6", metadata); + Disease d = DiseaseBuilder.builder("OMIM:100123", "test").onset(TimeElements.age("P3D")).build(); + Individual subject = IndividualBuilder.builder("individual.6").male().ageAtLastEncounter("P2Y").build(); + var features = List.of(lymphopenia, pneumonia, igA, igM); + builder.individual(subject).addDisease(d).addPhenotypicFeatures(features); + return new PpktIndividual(builder.build()); + } + + public static PpktIndividual PMID_9312167_A() { + PhenopacketBuilder builder = PhenopacketBuilder.create("PMID_9312167_A:I:2", metadata); + Disease d = DiseaseBuilder.builder("OMIM:179800", "Distal renal tubular acidosis 1").build(); + Individual subject = IndividualBuilder.builder("A:I:2").female().ageAtLastEncounter("P40Y").build(); + var pf1 = PhenotypicFeatureBuilder.builder("HP:0000121","Nephrocalcinosis").build(); + var pf2 = PhenotypicFeatureBuilder.builder("HP:0002900","Hypokalemia").build(); + var pf3 = PhenotypicFeatureBuilder.builder("HP:0032944","Alkaline urine").build(); + var pf4 = PhenotypicFeatureBuilder.builder("HP:0012100","Abnormal circulating creatinine concentration").excluded().build(); + var pf5 = PhenotypicFeatureBuilder.builder("HP:0008341","Distal renal tubular acidosis").excluded().build(); + var lst = List.of(pf1, pf2, pf3, pf4, pf5); + builder.individual(subject).addDisease(d).addPhenotypicFeatures(lst); + return new PpktIndividual(builder.build()); + } + + /* + El paciente era un hombre de 30 años qui se presentó con se descartaron Máculas hipomelanóticas, Rabdomioma cardíaco y Bradicardia.A la edad de 30 años, el presentó Fositas o muescas (pits) del esmalte dental y Fibromatosis gingival. + */ + public static PpktIndividual onlyExcludedAtPresentation() { + PhenopacketBuilder builder = PhenopacketBuilder.create("id8", metadata); + Disease d = DiseaseBuilder.builder("OMIM:100123", "test").onset(TimeElements.age("P3D")).build(); + Individual subject = IndividualBuilder.builder("individual.6").male().ageAtLastEncounter("P30Y").build(); + // HP:Gingival fibromatosis HP: + var pf1 = PhenotypicFeatureBuilder.builder("HP:0001662","Bradycardia").excluded().build(); + var pf2 = PhenotypicFeatureBuilder.builder("HP:0009729","Cardiac rhabdomyoma").excluded().build(); + var pf3 = PhenotypicFeatureBuilder.builder("HP:0009719","Hypomelanotic macule").excluded().build(); + var pf4 =PhenotypicFeatureBuilder.builder("HP:0009722","Dental enamel pits").onset(TimeElements.age("P30Y")).build(); + var pf5 =PhenotypicFeatureBuilder.builder("HP:0000169","Gingival fibromatosis").onset(TimeElements.age("P30Y")).build(); + var features = List.of(pf1,pf2,pf3,pf4,pf5); + builder.individual(subject).addDisease(d).addPhenotypicFeatures(features); + return new PpktIndividual(builder.build()); + } + + + + public static PhenopacketAge congenital = HpoOnsetAge.congenital(); + public static PhenopacketAge infantile = HpoOnsetAge.infantile(); + public static PhenopacketAge juvenile = HpoOnsetAge.juvenile(); + public static PhenopacketAge childhood = HpoOnsetAge.childhood(); + public static PhenopacketAge p46y = new Iso8601Age("P46Y"); + + + + + +} diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGeneratorTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGeneratorTest.java new file mode 100644 index 0000000..c28473a --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/EnglishPromptGeneratorTest.java @@ -0,0 +1,10 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.english; + +public class EnglishPromptGeneratorTest { + + + + + + +} diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java new file mode 100644 index 0000000..533d9d0 --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktIndividualEnglishTest.java @@ -0,0 +1,123 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.english; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase; + +import java.util.function.Supplier; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public class PpktIndividualEnglishTest extends PPKtIndividualBase{ + + + + private static Stream testGetIndividualDescription() { + return Stream.of( + new TestIndividual("46 year olf female, infantile onset", + female46yearsInfantileOnset(), new TestOutcome.Ok("The proband was a 46-year old woman who presented as an infant with")), + new TestIndividual("male 4 months, congenital onset", + male4monthsCongenitalOnset(), new TestOutcome.Ok("The proband was a 4-month old male infant who presented at birth with")), + new TestIndividual("female, no onset", + femaleNoAge(), new TestOutcome.Ok("The proband was a female who presented with")), + new TestIndividual("female, no HPOs", + femaleNoHPOs(), new TestOutcome.Error(() -> new PhenolRuntimeException("No HPO annotations"))), + new TestIndividual("unknown sex, no 4yo", + unknownSex4YearsOnset(), new TestOutcome.Ok("The proband presented in childhood with")) + ); + } + + + + @ParameterizedTest + @MethodSource("testGetIndividualDescription") + void testEvaluateExpression(TestIndividual testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualEnglish(); + PpktIndividual ppkti = testCase.ppktIndividual(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, generator.getIndividualDescription(ppkti), + "Incorrect evaluation for: " + testCase.description()); + case TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.getIndividualDescription(ppkti), + "Incorrect error handling for: " + testCase.description()); + } + } + + + + private static Stream testGetPPKtSex() { + return Stream.of( + new TestIdvlHeShe("female", + PhenopacketSex.FEMALE, new TestOutcome.Ok("she")), + new TestIdvlHeShe("male", + PhenopacketSex.MALE, new TestOutcome.Ok("he")), + new TestIdvlHeShe("proband", + PhenopacketSex.UNKNOWN, new TestOutcome.Ok("the individual")) + ); + } + + @ParameterizedTest + @MethodSource("testGetPPKtSex") + void testPPKtSex(TestIdvlHeShe testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualEnglish(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, generator.heSheIndividual(testCase.ppktSex())); + case TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.heSheIndividual(testCase.ppktSex()), + "Incorrect error handling for: " + testCase.description()); + } + } + + + +//public record TestIdvlAtAge(String description, PhenopacketAge ppktAge, TestOutcome expectedOutcome) {} + + + + + private static Stream testIndlAtAge() { + return Stream.of( + new TestIdvlAtAge("congenital", + congenital, new TestOutcome.Ok("At birth")), + new TestIdvlAtAge("infantile", + infantile, new TestOutcome.Ok("During the infantile period")), + new TestIdvlAtAge("childhood age", + childhood, new TestOutcome.Ok("During childhood")), + new TestIdvlAtAge("46 years old", + p46y, new TestOutcome.Ok("At an age of 46 years")) + ); + } + + + @ParameterizedTest + @MethodSource("testIndlAtAge") + void testPPKtSex(TestIdvlAtAge testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualEnglish(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, generator.atAge(testCase.ppktAge())); + case TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.atAge(testCase.ppktAge()), + "Incorrect error handling for: " + testCase.description()); + } + + + } + + + + + + +} diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglishTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglishTest.java new file mode 100644 index 0000000..33d8f37 --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/english/PpktPhenotypicFeatureEnglishTest.java @@ -0,0 +1,48 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.english; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; + +import java.util.function.Supplier; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase.female46yearsInfantileOnset; +import static org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase.male4monthsCongenitalOnset; + +public class PpktPhenotypicFeatureEnglishTest { + + + + + private static Stream testGetIndividualPhenotypicFeatures() { + return Stream.of( + new PPKtIndividualBase.TestIndividual("46 year olf female, infantile onset", + female46yearsInfantileOnset(), new PPKtIndividualBase.TestOutcome.Ok("Cerebellar atrophy and Ataxia. ")), + new PPKtIndividualBase.TestIndividual("male 4 months, congenital onset", + male4monthsCongenitalOnset(), new PPKtIndividualBase.TestOutcome.Ok("Postaxial polydactyly. ")) + ); + } + + @ParameterizedTest + @MethodSource("testGetIndividualPhenotypicFeatures") + void testEvaluateExpression(PPKtIndividualBase.TestIndividual testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualEnglish(); + EnglishPromptGenerator gen = new EnglishPromptGenerator(); + PpktIndividual ppkti = testCase.ppktIndividual(); + switch (testCase.expectedOutcome()) { + case PPKtIndividualBase.TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, gen.formatFeatures(ppkti.getPhenotypicFeaturesAtOnset()), + "Incorrect evaluation for: " + testCase.description()); + case PPKtIndividualBase.TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.getIndividualDescription(ppkti), + "Incorrect error handling for: " + testCase.description()); + } + } + +} diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java new file mode 100644 index 0000000..67f5fce --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/GermanPromptGeneratorTest.java @@ -0,0 +1,61 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.german; + +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenol.io.OntologyLoader; +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenopacket2prompt.international.HpInternational; +import org.monarchinitiative.phenopacket2prompt.international.HpInternationalOboParser; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator; + +import java.io.File; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase.twoYears; + + +/** + * Test only works with local hpo-international.obo + */ +@Disabled +public class GermanPromptGeneratorTest { + + private final static String case_vignette = """ +Ich führe ein Experiment mit einem klinischen Fallbericht durch, um zu sehen, wie sich Ihre Diagnosen mit denen menschlicher Experten vergleichen lassen. Ich werde Ihnen einen Teil eines medizinischen Falles vorstellen. Sie versuchen nicht, irgendwelche Patienten zu behandeln. In diesem Fall sind Sie „Dr. GPT-4“, ein KI-Sprachmodell, das eine Diagnose liefert. Hier sind einige Richtlinien. Erstens gibt es eine einzige definitive Diagnose, und es ist eine Diagnose, von der heute bekannt ist, dass sie beim Menschen existiert. Die Diagnose wird fast immer durch einen Gentest bestätigt. In seltenen Fällen, in denen ein solcher Test für eine Diagnose nicht existiert, kann die Diagnose jedoch anhand validierter klinischer Kriterien gestellt oder in sehr seltenen Fällen einfach durch eine Expertenmeinung bestätigt werden. Nachdem Sie den Fall gelesen haben, möchte ich, dass Sie eine Differentialdiagnose mit einer Liste von Kandidatendiagnosen stellen, die nach Wahrscheinlichkeit geordnet sind, beginnend mit dem wahrscheinlichsten Kandidaten. Jeder Kandidat sollte mit dem Krankheitsnamen angegeben werden. Wenn es sich bei dem ersten Kandidaten beispielsweise um das Branchiookulofaziale Syndrom und bei dem zweiten um Mukoviszidose handelt, geben Sie Folgendes in englischer Sprache an: + +1. Branchiooculofacial syndrome +2. Cystic fibrosis + +Diese Liste sollte so viele Diagnosen enthalten, wie Sie für sinnvoll halten. + +Sie müssen Ihre Argumentation nicht erläutern, sondern nur die Diagnosen auflisten. +Ich habe Ihnen diese Anleitung auf English gegeben, aber ich bitte Sie, ihre Antwort ausschließlich auf English zu liefern. +Hier ist der Fall: + +Der Patient war ein 2jähriger Junge, der sich im Alter von 3 Tagen mit den folgenden Symptomen vorgestellt hat: Lymphopenie, Lungenentzündung und Verminderter zirkulierender IgA-Spiegel. Im Alter von 2 Jahren, präsentierte er mit den folgenden Symptomen: Verringertes zirkulierendes Gesamt-IgM."""; + @Test + public void testCase() { + PpktIndividual i = twoYears(); + File hpJsonFile = new File("data/hp.json"); + if (! hpJsonFile.isFile()) { + throw new PhenolRuntimeException("Could not find hp.json at " + hpJsonFile.getAbsolutePath()); + } + Ontology hpo = OntologyLoader.loadOntology(hpJsonFile); + File translationsFile = new File("data/hp-international.obo"); + if (! translationsFile.isFile()) { + System.err.printf("Could not find translations file at %s. Try download command", translationsFile.getAbsolutePath()); + return ; + } + HpInternationalOboParser oboParser = new HpInternationalOboParser(translationsFile); + Map internationalMap = oboParser.getLanguageToInternationalMap(); + PromptGenerator german = PromptGenerator.german(internationalMap.get("de")); + String prompt = german.createPrompt(twoYears()); + assertEquals(case_vignette, prompt.trim()); + } + + + +} diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java new file mode 100644 index 0000000..25095d5 --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGermanTest.java @@ -0,0 +1,115 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.german; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; + +import java.util.function.Supplier; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public class PpktIndividualGermanTest extends PPKtIndividualBase{ + + + + private static Stream testGetIndividualDescription() { + return Stream.of( + new TestIndividual("46 year old female, infantile onset", + female46yearsInfantileOnset(), new TestOutcome.Ok("Die Patientin war eine 46-jährige Frau, die sich im Säuglingsalter mit den folgenden Symptomen vorgestellt hat: ")), + new TestIndividual("male 4 months, congenital onset", + male4monthsCongenitalOnset(), new TestOutcome.Ok("Der Patient war ein 4 Monate alter Säugling, der sich bei der Geburt mit den folgenden Symptomen vorgestellt hat: ")), + new TestIndividual("female, no onset", + femaleNoAge(), new TestOutcome.Ok("Die Patientin stellte sich mit den folgenden Symptomen vor: ")), + new TestIndividual("female, no HPOs", + femaleNoHPOs(), new TestOutcome.Error(() -> new PhenolRuntimeException("No HPO annotations"))), + new TestIndividual("unknown sex, no 4yo", + unknownSex4YearsOnset(), new TestOutcome.Ok("Der Patient stellte sich in der Kindheit mit den folgenden Symptomen vor: ")) + ); + } + + + + @ParameterizedTest + @MethodSource("testGetIndividualDescription") + void testEvaluateExpression(TestIndividual testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualGerman(); + PpktIndividual ppkti = testCase.ppktIndividual(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, generator.getIndividualDescription(ppkti), + "Incorrect evaluation for: " + testCase.description()); + case TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.getIndividualDescription(ppkti), + "Incorrect error handling for: " + testCase.description()); + } + } + + + + private static Stream testGetPPKtSex() { + return Stream.of( + new TestIdvlHeShe("female", + PhenopacketSex.FEMALE, new TestOutcome.Ok("sie")), + new TestIdvlHeShe("male", + PhenopacketSex.MALE, new TestOutcome.Ok("er")), + new TestIdvlHeShe("proband", + PhenopacketSex.UNKNOWN, new TestOutcome.Ok("die Person")) + ); + } + + @ParameterizedTest + @MethodSource("testGetPPKtSex") + void testPPKtSex(TestIdvlHeShe testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualGerman(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, generator.heSheIndividual(testCase.ppktSex())); + case TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.heSheIndividual(testCase.ppktSex()), + "Incorrect error handling for: " + testCase.description()); + } + } + + + + + private static Stream testIndlAtAge() { + return Stream.of( + new TestIdvlAtAge("congenital", + congenital, new TestOutcome.Ok("Zum Zeitpunkt der Geburt")), + new TestIdvlAtAge("infantile", + infantile, new TestOutcome.Ok("Als Säugling")), + new TestIdvlAtAge("childhood age", + childhood, new TestOutcome.Ok("In der Kindheit")), + new TestIdvlAtAge("46 years old", + p46y, new TestOutcome.Ok("Im Alter von 46 Jahren")) + ); + } + + + @ParameterizedTest + @MethodSource("testIndlAtAge") + void testPPKtSex(TestIdvlAtAge testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualGerman(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, generator.atAge(testCase.ppktAge())); + case TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.atAge(testCase.ppktAge()), + "Incorrect error handling for: " + testCase.description()); + } + } + + + + +} diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGeneratorTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGeneratorTest.java new file mode 100644 index 0000000..bcf0cdd --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/ItalianPromptGeneratorTest.java @@ -0,0 +1,10 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.italian; + +public class ItalianPromptGeneratorTest { + + + + + + +} diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalianTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalianTest.java new file mode 100644 index 0000000..f501786 --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/italian/PpktIndividualItalianTest.java @@ -0,0 +1,123 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.italian; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; + +import java.util.function.Supplier; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public class PpktIndividualItalianTest extends PPKtIndividualBase{ + + + + private static Stream testGetIndividualDescription() { + return Stream.of( + new TestIndividual("46 year old female, infantile onset", + female46yearsInfantileOnset(), new TestOutcome.Ok("Il soggetto era una donna di 46 anni che si è presentato nel periodo infantile con")), + new TestIndividual("male 4 months, congenital onset", + male4monthsCongenitalOnset(), new TestOutcome.Ok("Il soggetto era un infante maschio di 4 mesi che si è presentato alla nascita con")), + new TestIndividual("female, no onset", + femaleNoAge(), new TestOutcome.Ok("Il soggetto era una femmina che si è presentata con")), + new TestIndividual("female, no HPOs", + femaleNoHPOs(), new TestOutcome.Error(() -> new PhenolRuntimeException("Nessuna anomalia fenotipica"))), + new TestIndividual("unknown sex, no 4yo", + unknownSex4YearsOnset(), new TestOutcome.Ok("Il soggetto si è presentato da bambino con")) + ); + } + + + + @ParameterizedTest + @MethodSource("testGetIndividualDescription") + void testEvaluateExpression(TestIndividual testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualItalian(); + PpktIndividual ppkti = testCase.ppktIndividual(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, generator.getIndividualDescription(ppkti), + "Incorrect evaluation for: " + testCase.description()); + case TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.getIndividualDescription(ppkti), + "Incorrect error handling for: " + testCase.description()); + } + } + + + + private static Stream testGetPPKtSex() { + return Stream.of( + new TestIdvlHeShe("female", + PhenopacketSex.FEMALE, new TestOutcome.Ok("lei")), + new TestIdvlHeShe("male", + PhenopacketSex.MALE, new TestOutcome.Ok("lui")), + new TestIdvlHeShe("proband", + PhenopacketSex.UNKNOWN, new TestOutcome.Ok("il soggetto")) + ); + } + + @ParameterizedTest + @MethodSource("testGetPPKtSex") + void testPPKtSex(TestIdvlHeShe testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualItalian(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, generator.heSheIndividual(testCase.ppktSex())); + case TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.heSheIndividual(testCase.ppktSex()), + "Incorrect error handling for: " + testCase.description()); + } + } + + + +//public record TestIdvlAtAge(String description, PhenopacketAge ppktAge, TestOutcome expectedOutcome) {} + + + + + private static Stream testIndlAtAge() { + return Stream.of( + new TestIdvlAtAge("congenital", + congenital, new TestOutcome.Ok("Alla nascita")), + new TestIdvlAtAge("infantile", + infantile, new TestOutcome.Ok("Durante il periodo infantile")), + new TestIdvlAtAge("childhood age", + childhood, new TestOutcome.Ok("Durante l'infanzia")), + new TestIdvlAtAge("46 years old", + p46y, new TestOutcome.Ok("All'età di 46 anni")) + ); + } + + + @ParameterizedTest + @MethodSource("testIndlAtAge") + void testPPKtSex(TestIdvlAtAge testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualItalian(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, generator.atAge(testCase.ppktAge())); + case TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.atAge(testCase.ppktAge()), + "Incorrect error handling for: " + testCase.description()); + } + + + } + + + + + + +} diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java new file mode 100644 index 0000000..903b6fc --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktIndividualSpanishTest.java @@ -0,0 +1,119 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.spanish; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; +import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase; +import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; + +import java.util.function.Supplier; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public class PpktIndividualSpanishTest extends PPKtIndividualBase{ + + + + private static Stream testGetIndividualDescription() { + return Stream.of( + new TestIndividual("46 year olf female, infantile onset", + female46yearsInfantileOnset(), new TestOutcome.Ok("La paciente era una mujer de 46 años que se presentó en el primer año de vida con")), + new TestIndividual("male 4 months, congenital onset", + male4monthsCongenitalOnset(), new TestOutcome.Ok("El paciente era un bebé de 4 meses que se presentó al nacer con")), + new TestIndividual("female, no onset", + femaleNoAge(), new TestOutcome.Ok("La paciente se presentó con")), + new TestIndividual("female, no HPOs", + femaleNoHPOs(), new TestOutcome.Error(() -> new PhenolRuntimeException("No HPO annotations"))), + new TestIndividual("unknown sex, no 4yo", + unknownSex4YearsOnset(), new TestOutcome.Ok("El paciente se presentó en la niñez con")) + ); + } + + + + @ParameterizedTest + @MethodSource("testGetIndividualDescription") + void testEvaluateExpression(TestIndividual testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualSpanish(); + PpktIndividual ppkti = testCase.ppktIndividual(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, generator.getIndividualDescription(ppkti), + "Incorrect evaluation for: " + testCase.description()); + case TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.getIndividualDescription(ppkti), + "Incorrect error handling for: " + testCase.description()); + } + } + + + + private static Stream testGetPPKtSex() { + return Stream.of( + new TestIdvlHeShe("female", + PhenopacketSex.FEMALE, new TestOutcome.Ok("ella")), + new TestIdvlHeShe("male", + PhenopacketSex.MALE, new TestOutcome.Ok("el")), + new TestIdvlHeShe("proband", + PhenopacketSex.UNKNOWN, new TestOutcome.Ok("el individuo")) + ); + } + + @ParameterizedTest + @MethodSource("testGetPPKtSex") + void testPPKtSex(TestIdvlHeShe testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualSpanish(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, generator.heSheIndividual(testCase.ppktSex())); + case TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.heSheIndividual(testCase.ppktSex()), + "Incorrect error handling for: " + testCase.description()); + } + } + + + + + private static Stream testIndlAtAge() { + return Stream.of( + new TestIdvlAtAge("congenital", + congenital, new TestOutcome.Ok("Al nacer")), + new TestIdvlAtAge("infantile", + infantile, new TestOutcome.Ok("Durante el período infantil")), + new TestIdvlAtAge("childhood age", + childhood, new TestOutcome.Ok("Durante la infancia")), + new TestIdvlAtAge("46 years old", + p46y, new TestOutcome.Ok("A la edad de 46 años")) + ); + } + + + @ParameterizedTest + @MethodSource("testIndlAtAge") + void testPPKtSex(TestIdvlAtAge testCase) { + PPKtIndividualInfoGenerator generator = new PpktIndividualSpanish(); + switch (testCase.expectedOutcome()) { + case TestOutcome.Ok(String expectedResult) -> + assertEquals(expectedResult, generator.atAge(testCase.ppktAge())); + case TestOutcome.Error(Supplier exceptionSupplier) -> + assertThrows(exceptionSupplier.get().getClass(), + () -> generator.atAge(testCase.ppktAge()), + "Incorrect error handling for: " + testCase.description()); + } + + + } + + + + + + +} diff --git a/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptTest.java b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptTest.java new file mode 100644 index 0000000..0362553 --- /dev/null +++ b/src/test/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/SpanishPromptTest.java @@ -0,0 +1,79 @@ +package org.monarchinitiative.phenopacket2prompt.output.impl.spanish; + +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.monarchinitiative.phenol.base.PhenolRuntimeException; +import org.monarchinitiative.phenol.io.OntologyLoader; +import org.monarchinitiative.phenol.ontology.data.Ontology; +import org.monarchinitiative.phenopacket2prompt.international.HpInternational; +import org.monarchinitiative.phenopacket2prompt.international.HpInternationalOboParser; +import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator; + +import java.io.File; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualBase.*; + +/** + * Test only works with local hpo-international.obo + */ +@Disabled +public class SpanishPromptTest { + + private static final String case_vignette = """ +Estoy realizando un experimento con el informe de un caso clínico para comparar sus diagnósticos con los de expertos humanos. Les voy a dar parte de un caso médico. No estás intentando tratar a ningún paciente. En este caso, usted es el “Dr. GPT-4”, un modelo de lenguaje de IA que proporciona un diagnóstico. Aquí hay algunas pautas. En primer lugar, existe un único diagnóstico definitivo, y es un diagnóstico que hoy se sabe que existe en humanos. El diagnóstico casi siempre se confirma mediante algún tipo de prueba genética, aunque en casos raros cuando no existe dicha prueba para un diagnóstico, el diagnóstico puede realizarse utilizando criterios clínicos validados o, muy raramente, simplemente confirmado por la opinión de un experto. Después de leer el caso, quiero que haga un diagnóstico diferencial con una lista de diagnósticos candidatos clasificados por probabilidad comenzando con el candidato más probable. Cada candidato debe especificarse con el nombre de la enfermedad. Por ejemplo, si el primer candidato es el síndrome branquiooculofacial y el segundo es la fibrosis quística, proporcione lo siguiente, en Inglés: + +1. Branchiooculofacial syndrome +2. Cystic fibrosis + +Esta lista debe proporcionar tantos diagnósticos como considere razonables. + +No es necesario que explique su razonamiento, simplemente enumere los diagnósticos. +Te estoy dando estas instrucciones en Español pero quiero que proveas todas tus respuestas en Inglés. +Este es el caso: + +El paciente era un niño de 2 años que se presentó a la edad de 3 dias con Linfopenia, Neumonía y Déficit de IgA. A la edad de 2 años, el presentó Déficit de IgM."""; + + @Test + public void test() { + var ppktI = PMID_9312167_A(); + File hpJsonFile = new File("data/hp.json"); + if (! hpJsonFile.isFile()) { + throw new PhenolRuntimeException("Could not find hp.json at " + hpJsonFile.getAbsolutePath()); + } + Ontology hpo = OntologyLoader.loadOntology(hpJsonFile); + File translationsFile = new File("data/hp-international.obo"); + if (! translationsFile.isFile()) { + System.err.printf("Could not find translations file at %s. Try download command", translationsFile.getAbsolutePath()); + return ; + } + HpInternationalOboParser oboParser = new HpInternationalOboParser(translationsFile); + Map internationalMap = oboParser.getLanguageToInternationalMap(); + PromptGenerator german = PromptGenerator.spanish(internationalMap.get("es")); + String prompt = german.createPrompt(twoYears()); + assertEquals(case_vignette, prompt.trim()); + } + + + @Test + public void testNoObservedAtOnset() { + File hpJsonFile = new File("data/hp.json"); + if (! hpJsonFile.isFile()) { + throw new PhenolRuntimeException("Could not find hp.json at " + hpJsonFile.getAbsolutePath()); + } + Ontology hpo = OntologyLoader.loadOntology(hpJsonFile); + File translationsFile = new File("data/hp-international.obo"); + if (! translationsFile.isFile()) { + System.err.printf("Could not find translations file at %s. Try download command", translationsFile.getAbsolutePath()); + return ; + } + HpInternationalOboParser oboParser = new HpInternationalOboParser(translationsFile); + Map internationalMap = oboParser.getLanguageToInternationalMap(); + PromptGenerator spanish = PromptGenerator.spanish(internationalMap.get("es")); + String prompt = spanish.createPrompt(onlyExcludedAtPresentation()); + assertEquals(case_vignette, prompt.trim()); + } + + +}