Skip to content

Commit

Permalink
Merge branch 'TASK-5564' into TASK-5387
Browse files Browse the repository at this point in the history
  • Loading branch information
jtarraga committed May 7, 2024
2 parents 4ea3ffe + babb593 commit 7972070
Show file tree
Hide file tree
Showing 7 changed files with 261 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
package org.opencb.biodata.formats.feature.mirbase;

import org.opencb.biodata.models.core.MiRnaGene;
import org.opencb.biodata.models.core.MiRnaMature;
import org.opencb.commons.utils.FileUtils;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Path;

public class MirBaseParser {

private static final String ID_LABEL = "ID";
private static final String AC_LABEL = "AC";
private static final String DE_LABEL = "DE";
private static final String FT_LABEL = "FT";
private static final String SQ_LABEL = "SQ";
private static final String END_OF_ITEM_LABEL = "XX";
private static final String END_OF_RECORD_LABEL = "//";

private static final String MIRNA_LABEL = "miRNA";

private MirBaseParser() {
throw new IllegalStateException("Utility class");
}

public static void parse(Path miRnaDatFile, String species, MirBaseParserCallback callback) throws IOException {
try (BufferedReader datReader = new BufferedReader(new InputStreamReader(FileUtils.newInputStream(miRnaDatFile)))) {
String miRBaseAccession = null;
String miRBaseID = null;
MiRnaGene miRnaGene = null;
String line;
while ((line = datReader.readLine()) != null) {
String[] split = line.split("\\s+");
switch (split[0]) {
case ID_LABEL: {
miRBaseID = split[1];
break;
}
case AC_LABEL: {
miRBaseAccession = split[1].split(";")[0];
break;
}
case DE_LABEL: {
if (line.contains(species)) {
miRnaGene = new MiRnaGene();
miRnaGene.setId(miRBaseID)
.setAccession(miRBaseAccession);
}
break;
}
case FT_LABEL: {
if (miRnaGene != null && MIRNA_LABEL.equalsIgnoreCase(split[1])) {
processMiRnaMature(line, miRnaGene, datReader);
}
break;
}
case SQ_LABEL: {
if (miRnaGene != null) {
StringBuilder seq = new StringBuilder();
// Read until END_OF_RECORD_LABEL
while (!(line = datReader.readLine()).equals(END_OF_RECORD_LABEL)) {
split = line.split("\\s+");
for (int i = 1; i < split.length - 1; i++) {
seq.append(split[i]);
}
}
miRnaGene.setSequence(seq.toString());

// Update mature sequences
for (MiRnaMature mature : miRnaGene.getMatures()) {
if (mature.getStart() > 0 && mature.getEnd() > 0) {
mature.setSequence(miRnaGene.getSequence().substring(mature.getStart() - 1, mature.getEnd()));
}
}

// Callback
callback.processMiRnaGene(miRnaGene);
miRnaGene = null;
}
break;
}
default: {
// Do nothing
break;
}
}
}
}
}

private static void processMiRnaMature(String headerLine, MiRnaGene miRnaGene, BufferedReader datReader) throws IOException {
// Create MiRNA mature from header line,
// e.g: FT miRNA 6..27
MiRnaMature miRnaMature = new MiRnaMature();
String[] split = headerLine.split("\\s+");
String[] pos = split[2].split("\\.\\.");
miRnaMature.setStart(Integer.parseInt(pos[0]));
miRnaMature.setEnd(Integer.parseInt(pos[1]));

String line;
while (!(line = datReader.readLine()).equals(END_OF_ITEM_LABEL)) {
split = line.split("\\s+");
if (split[0].equalsIgnoreCase(FT_LABEL) && split[1].equalsIgnoreCase(MIRNA_LABEL)) {
processMiRnaMature(line, miRnaGene, datReader);
break;
} else {
if (line.contains("accession=")) {
miRnaMature.setAccession(line.split("accession=")[1].replace("\"", ""));
} else if (line.contains("product=")) {
miRnaMature.setId(line.split("product=")[1].replace("\"", ""));
}
}
}
miRnaGene.getMatures().add(miRnaMature);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package org.opencb.biodata.formats.feature.mirbase;

import org.opencb.biodata.models.core.MiRnaGene;

public interface MirBaseParserCallback {
boolean processMiRnaGene(MiRnaGene miRnaGene);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package org.opencb.biodata.formats.feature.mirbase;

import org.junit.Assert;
import org.junit.Test;
import org.opencb.biodata.models.core.MiRnaGene;
import org.opencb.biodata.models.core.MiRnaMature;

import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

public class MirBaseParserTest {



// Implementation of the MirBaseParserCallback function
public class MyCallback implements MirBaseParserCallback {
private String msg;
private List<MiRnaGene> miRnaGenes;

public MyCallback(String msg) {
this.msg = msg;
this.miRnaGenes = new ArrayList<>();
}

@Override
public boolean processMiRnaGene(MiRnaGene miRnaGene) {
System.out.println(msg);
System.out.println(miRnaGene.toString());
miRnaGenes.add(miRnaGene);
return true;
}

public List<MiRnaGene> getMiRnaGenes() {
return miRnaGenes;
}

public MiRnaGene getMiRnaGene(String accession) {
for (MiRnaGene miRnaGene : miRnaGenes) {
if (accession.equals(miRnaGene.getAccession())) {
return miRnaGene;
}
}
return null;
}

public int getCounter() {
return miRnaGenes.size();
}
}

@Test
public void testMirBaseParser() throws IOException {
Path datFile = Paths.get(getClass().getResource("/miRNA.small.dat.gz").getPath());

MyCallback callback = new MyCallback(">>> Testing message");

MirBaseParser.parse(datFile, "Homo sapiens", callback);
Assert.assertEquals(50, callback.getCounter());

MiRnaGene mi0000060 = callback.getMiRnaGene("MI0000060");
Assert.assertEquals("hsa-let-7a-1", mi0000060.getId());
Assert.assertEquals("ugggaUGAGGUAGUAGGUUGUAUAGUUuuagggucacacccaccacugggagauaaCUAUACAAUCUACUGUCUUUCcua".toUpperCase(), mi0000060.getSequence().toUpperCase());
int found = 0;
for (MiRnaMature mature : mi0000060.getMatures()) {
if ("MIMAT0000062".equals(mature.getAccession())) {
found++;
Assert.assertEquals("hsa-let-7a-5p", mature.getId());
Assert.assertEquals("UGAGGUAGUAGGUUGUAUAGUU".toUpperCase(), mature.getSequence().toUpperCase());
Assert.assertEquals(6, mature.getStart());
Assert.assertEquals(27, mature.getEnd());
} else if ("MIMAT0004481".equals(mature.getAccession())) {
found++;
Assert.assertEquals("hsa-let-7a-3p", mature.getId());
Assert.assertEquals("CUAUACAAUCUACUGUCUUUC".toUpperCase(), mature.getSequence().toUpperCase());
Assert.assertEquals(57, mature.getStart());
Assert.assertEquals(77, mature.getEnd());
}
}
Assert.assertEquals(2, found);

MiRnaGene mi0000077 = callback.getMiRnaGene("MI0000077");
Assert.assertEquals("hsa-mir-21", mi0000077.getId());
Assert.assertEquals("ugucgggUAGCUUAUCAGACUGAUGUUGAcuguugaaucucauggCAACACCAGUCGAUGGGCUGUcugaca".toUpperCase(), mi0000077.getSequence().toUpperCase());
found = 0;
for (MiRnaMature mature : mi0000077.getMatures()) {
if ("MIMAT0000076".equals(mature.getAccession())) {
found++;
Assert.assertEquals("hsa-miR-21-5p", mature.getId());
Assert.assertEquals("UAGCUUAUCAGACUGAUGUUGA".toUpperCase(), mature.getSequence().toUpperCase());
Assert.assertEquals(8, mature.getStart());
Assert.assertEquals(29, mature.getEnd());
} else if ("MIMAT0004494".equals(mature.getAccession())) {
found++;
Assert.assertEquals("hsa-miR-21-3p", mature.getId());
Assert.assertEquals("CAACACCAGUCGAUGGGCUGU".toUpperCase(), mature.getSequence().toUpperCase());
Assert.assertEquals(46, mature.getStart());
Assert.assertEquals(66, mature.getEnd());
}
}
Assert.assertEquals(2, found);
}
}
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public class MiRnaGene {
private List<MiRnaMature> matures;

public MiRnaGene() {

matures = new ArrayList<>();
}

public MiRnaGene(String accession, String id, String status, String sequence, List<MiRnaMature> matures) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ public class Literature {
private String _sameAs;
private List<CrossReference> crossReferences;
private String objCls;
private String pubDate;
private List<Term> terms;
private String type;

public float getId() {
return id;
Expand Down Expand Up @@ -55,6 +57,15 @@ public Literature setObjCls(String objCls) {
return this;
}

public String getPubDate() {
return pubDate;
}

public Literature setPubDate(String pubDate) {
this.pubDate = pubDate;
return this;
}

public List<Term> getTerms() {
return terms;
}
Expand All @@ -63,6 +74,15 @@ public Literature setTerms(List<Term> terms) {
this.terms = terms;
return this;
}

public String getType() {
return type;
}

public Literature setType(String type) {
this.type = type;
return this;
}
}


Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ public class PharmaDosingGuideline {
private boolean hasTestingInfo;
private List<History> history;
private List<Literature> literature;
private boolean otherPrescribingGuidance;
private boolean pediatric;
private PediatricMarkdown pediatricMarkdown;
private boolean recommendation;
Expand Down Expand Up @@ -136,6 +137,15 @@ public PharmaDosingGuideline setLiterature(List<Literature> literature) {
return this;
}

public boolean isOtherPrescribingGuidance() {
return otherPrescribingGuidance;
}

public PharmaDosingGuideline setOtherPrescribingGuidance(boolean otherPrescribingGuidance) {
this.otherPrescribingGuidance = otherPrescribingGuidance;
return this;
}

public boolean isPediatric() {
return pediatric;
}
Expand Down

0 comments on commit 7972070

Please sign in to comment.