From bd8fc304679ef0205e933d0ab1ead1a3855fa042 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Fri, 7 Jun 2024 17:48:57 +0200 Subject: [PATCH] copy phenopackets to output --- .../cmd/GbtTranslateBatchCommand.java | 14 ++++- .../phenopacket2prompt/output/PpktCopy.java | 53 +++++++++++++++++++ .../impl/dutch/DutchPromptGenerator.java | 5 -- .../impl/german/PpktIndividualGerman.java | 6 ++- .../spanish/PpktPhenotypicfeatureSpanish.java | 4 +- 5 files changed, 71 insertions(+), 11 deletions(-) create mode 100644 src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktCopy.java diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java index 20c5315..67ad96f 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/cmd/GbtTranslateBatchCommand.java @@ -9,6 +9,7 @@ import org.monarchinitiative.phenopacket2prompt.model.PhenopacketDisease; import org.monarchinitiative.phenopacket2prompt.model.PpktIndividual; import org.monarchinitiative.phenopacket2prompt.output.CorrectResult; +import org.monarchinitiative.phenopacket2prompt.output.PpktCopy; import org.monarchinitiative.phenopacket2prompt.output.PromptGenerator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,6 +42,10 @@ public class GbtTranslateBatchCommand implements Callable { description = "path to translations file") private String translationsPath = "data/hp-international.obo"; + @CommandLine.Option(names = {"-o", "--outdir"}, + description = "path to outdir") + private String outdirname = "prompts"; + @CommandLine.Option(names = {"-d", "--dir"}, description = "Path to directory with JSON phenopacket files", required = true) private String ppktDir; @@ -61,10 +66,11 @@ public Integer call() throws Exception { return 1; } HpInternationalOboParser oboParser = new HpInternationalOboParser(translationsFile); + Map internationalMap = oboParser.getLanguageToInternationalMap(); LOGGER.info("Got {} translations", internationalMap.size()); List ppktFiles = getAllPhenopacketJsonFiles(); - createDir("prompts"); + createDir(outdirname); List correctResultList = outputPromptsEnglish(ppktFiles, hpo); // output all non-English languages here @@ -86,7 +92,11 @@ public Integer call() throws Exception { PromptGenerator italian = PromptGenerator.italian(internationalMap.get("it")); outputPromptsInternational(ppktFiles, hpo, "it", italian); resetOutput("finished"); - + // output original phenopackets + PpktCopy pcopy = new PpktCopy(new File(outdirname)); + for (var file : ppktFiles) { + pcopy.copyFile(file); + } // output file with correct diagnosis list outputCorrectResults(correctResultList); diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktCopy.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktCopy.java new file mode 100644 index 0000000..996b4d0 --- /dev/null +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/PpktCopy.java @@ -0,0 +1,53 @@ +package org.monarchinitiative.phenopacket2prompt.output; + + +import org.monarchinitiative.phenol.base.PhenolRuntimeException; + +import java.io.*; + +/** + * Class to copy phenopackets from the input directory to an output directory so that we have all of the files + * used for an experiment in one place. + */ +public class PpktCopy { + + private final File ppkt_out_dir; + + + public PpktCopy(File outdirectory) { + ppkt_out_dir = new File(outdirectory + File.separator + "original_phenopackets"); + createDir(ppkt_out_dir); + } + + + + private void createDir(File path) { + if (! path.exists() ) { + boolean result = path.mkdir(); + if (! result) { + throw new PhenolRuntimeException("Could not create output directory at " + path); + } + } + } + + public void copyFile(File sourceLocation) { + try { + String fname = sourceLocation.getName(); + File outfile = new File(ppkt_out_dir + File.separator + fname); + + InputStream in = new FileInputStream(sourceLocation); + OutputStream out = new FileOutputStream(outfile); + + // Copy the bits from instream to outstream + byte[] buf = new byte[1024]; + int len; + while ((len = in.read(buf)) > 0) { + out.write(buf, 0, len); + } + in.close(); + out.close(); + } catch (IOException e) { + throw new PhenolRuntimeException(e.getMessage()); + } + } +} diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java index 6e89600..0fe24d4 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/dutch/DutchPromptGenerator.java @@ -1,6 +1,5 @@ package org.monarchinitiative.phenopacket2prompt.output.impl.dutch; -import org.monarchinitiative.phenol.ontology.data.Ontology; import org.monarchinitiative.phenopacket2prompt.model.OntologyTerm; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketAge; import org.monarchinitiative.phenopacket2prompt.model.PhenopacketSex; @@ -15,9 +14,6 @@ public class DutchPromptGenerator implements PromptGenerator { - private final Ontology hpo; - - private final PPKtIndividualInfoGenerator ppktAgeSexGenerator; private final PhenopacketTextGenerator ppktTextGenerator; @@ -27,7 +23,6 @@ public class DutchPromptGenerator implements PromptGenerator { public DutchPromptGenerator(PpktPhenotypicFeatureGenerator pfgen) { - this.hpo = hpo; ppktAgeSexGenerator = new PpktIndividualDutch(); ppktTextGenerator = new PpktTextDutch(); this.ppktPhenotypicFeatureGenerator = pfgen; diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java index ccbdc7d..8b134ce 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/german/PpktIndividualGerman.java @@ -1,6 +1,5 @@ package org.monarchinitiative.phenopacket2prompt.output.impl.german; -import com.sun.source.tree.BreakTree; import org.monarchinitiative.phenol.base.PhenolRuntimeException; import org.monarchinitiative.phenopacket2prompt.model.*; import org.monarchinitiative.phenopacket2prompt.output.PPKtIndividualInfoGenerator; @@ -449,7 +448,10 @@ public String atAge(PhenopacketAge ppktAge) { case "Neonatal onset" -> "In der neugeborenen Zeit"; case "Congenital onset" -> "Zum Zeitpunkt der Geburt"; case "Adult onset" -> "Im Erwachsenenalter"; - default-> String.format("TODO TODO el %s período", label.replace(" onset", "")); + case "Juvenile onset" -> "Im Jugendlichenalter"; + default-> { + throw new PhenolRuntimeException("No German translation for " + label); + } }; } else { return ""; // should never get here diff --git a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java index 7876582..629ccb0 100644 --- a/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java +++ b/src/main/java/org/monarchinitiative/phenopacket2prompt/output/impl/spanish/PpktPhenotypicfeatureSpanish.java @@ -95,9 +95,9 @@ public String formatFeatures(List ontologyTerms) { } else { String exclusion; if (excludedLabels.size() == 1) { - exclusion = String.format(" En cambio, se descartó %s.", getOxfordCommaList(excludedLabels)); + exclusion = String.format(". En cambio, se descartó %s.", getOxfordCommaList(excludedLabels)); } else { - exclusion = String.format(" En cambio, se descartaron %s.", getOxfordCommaList(excludedLabels)); + exclusion = String.format(". En cambio, se descartaron %s.", getOxfordCommaList(excludedLabels)); } return getOxfordCommaList(observedLabels) + exclusion; }