Skip to content

Commit

Permalink
Allow for note to be added (third argument) upon input
Browse files Browse the repository at this point in the history
  • Loading branch information
mpresteg committed Jul 1, 2017
1 parent 72d2491 commit 9429c1c
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 70 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@
import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Handler;
import java.util.logging.LogManager;
import java.util.logging.Logger;
Expand Down Expand Up @@ -89,7 +87,7 @@ private static void analyzeGLStringFiles(String[] filenames) throws IOException
}

public static List<Sample> analyzeGLStringFile(String name, BufferedReader reader) throws IOException {
LinkedHashMap<String, String> glStrings = GLStringUtilities.readGLStringFile(name, reader);
List<LinkageDisequilibriumGenotypeList> glStrings = GLStringUtilities.readGLStringFile(name, reader);

List<Sample> samplesList = detectLinkages(glStrings);

Expand All @@ -100,7 +98,7 @@ public static List<Sample> analyzeGLStringFile(String name, BufferedReader reade
* @param filename
*/
public static void analyzeGLStringFile(String filename) throws IOException {
LinkedHashMap<String, String> glStrings = GLStringUtilities.readGLStringFile(filename);
List<LinkageDisequilibriumGenotypeList> glStrings = GLStringUtilities.readGLStringFile(filename);
List<Sample> samplesList = null;

samplesList = detectLinkages(glStrings);
Expand All @@ -111,7 +109,6 @@ public static void analyzeGLStringFile(String filename) throws IOException {
HaplotypePairWriter.getInstance().reportDetectedLinkages(findings);
CommonWellDocumentedWriter.getInstance().reportCommonWellDocumented(findings);
DetectedFindingsWriter.getInstance().reportDetectedFindings(findings);
//SummaryWriter.getInstance().reportDetectedLinkages(findings);
}

SamplesList allSamples = new SamplesList();
Expand All @@ -125,24 +122,11 @@ public static void analyzeGLStringFile(String filename) throws IOException {
* @throws IOException
* @throws SecurityException
*/
private static List<Sample> detectLinkages(Map<String, String> glStrings) {
LinkageDisequilibriumGenotypeList linkedGLString;
String glString;
private static List<Sample> detectLinkages(List<LinkageDisequilibriumGenotypeList> glStrings) {
List<Sample> samplesList = new ArrayList<Sample>();

int idx = 1;
for (String key : glStrings.keySet()) {
glString = glStrings.get(key);
String submittedGlString = glString;

if (!GLStringUtilities.validateGLStringFormat(glString)) {
glString = GLStringUtilities.fullyQualifyGLString(glString);
}

MultilocusUnphasedGenotype mug = GLStringUtilities.convertToMug(glString);
linkedGLString = new LinkageDisequilibriumGenotypeList(key, mug);

linkedGLString.setSubmittedGlString(submittedGlString);
for (LinkageDisequilibriumGenotypeList linkedGLString : glStrings) {

List<Haplotype> knownHaplotypes = GLStringUtilities.buildHaplotypes(linkedGLString);

Expand Down
5 changes: 5 additions & 0 deletions ld-validation/src/main/java/org/dash/valid/Sample.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ public String getId() {
return getGenotypeList().getId();
}

@XmlAttribute(name="note")
public String getNote() {
return getGenotypeList().getNote();
}

@XmlElement(name="processed-gl-string")
public String getProcessedGlString() {
return getGenotypeList().getGLString().equals(getGenotypeList().getSubmittedGlString()) ? null : getGenotypeList().getGLString();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Set;
import java.util.StringTokenizer;
Expand Down Expand Up @@ -416,11 +415,11 @@ public static String fillLocus(Locus locus, String segment) {
return segment;
}

public static LinkedHashMap<String, String> readGLStringFile(String name, BufferedReader reader) {
LinkedHashMap<String, String> glStrings = null;
public static List<LinkageDisequilibriumGenotypeList> readGLStringFile(String name, BufferedReader reader) {
List<LinkageDisequilibriumGenotypeList> linkedGLStrings = null;

try {
glStrings = parseGLStringFile(name, reader);
linkedGLStrings = parseGLStringFile(name, reader);
} catch (IOException e) {
LOGGER.severe("Problem reading GL String file: " + name);
e.printStackTrace();
Expand All @@ -430,12 +429,12 @@ public static LinkedHashMap<String, String> readGLStringFile(String name, Buffer
e.printStackTrace();
}

return glStrings;
return linkedGLStrings;
}

public static LinkedHashMap<String, String> readGLStringFile(String filename) {
public static List<LinkageDisequilibriumGenotypeList> readGLStringFile(String filename) {
BufferedReader reader = null;
LinkedHashMap<String, String> glStrings = null;
List<LinkageDisequilibriumGenotypeList> linkedGLStrings = null;

try {
InputStream stream = GLStringUtilities.class.getClassLoader()
Expand All @@ -446,7 +445,7 @@ public static LinkedHashMap<String, String> readGLStringFile(String filename) {

reader = new BufferedReader(new InputStreamReader(stream));

glStrings = parseGLStringFile(filename, reader);
linkedGLStrings = parseGLStringFile(filename, reader);

} catch (FileNotFoundException e) {
LOGGER.severe("Couldn't find GL String file: " + filename);
Expand All @@ -465,14 +464,15 @@ public static LinkedHashMap<String, String> readGLStringFile(String filename) {
e.printStackTrace();
}
}

return glStrings;
return linkedGLStrings;
}

private static LinkedHashMap<String, String> parseGLStringFile(String filename,
private static List<LinkageDisequilibriumGenotypeList> parseGLStringFile(String filename,
BufferedReader reader)
throws IOException, ParserConfigurationException, SAXException {
LinkedHashMap<String, String> glStrings = new LinkedHashMap<String, String>();
List<LinkageDisequilibriumGenotypeList> linkedGLStrings = new ArrayList<LinkageDisequilibriumGenotypeList>();


if (filename.endsWith(GLStringConstants.XML) || filename.endsWith(GLStringConstants.HML)) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
Expand All @@ -492,29 +492,63 @@ private static LinkedHashMap<String, String> parseGLStringFile(String filename,
if (j > 0) glString.append(GLStringConstants.GENE_DELIMITER);
glString.append(((Element) alleleAssignment.getElementsByTagName(GLStringConstants.GL_STRING_ELEMENT).item(0)).getTextContent().trim());
}
glStrings.put(sampleId, glString.toString());

linkedGLStrings.add(inflateGenotypeList(sampleId, glString.toString(), null));
}
}
else {
String line;
String[] parts = null;
int lineNumber = 0;
String glString;
String id;
String note = null;

while ((line = reader.readLine()) != null) {
lineNumber++;

parts = line.split(FILE_DELIMITER_REGEX);

if (parts.length == 1) {
glStrings.put(filename + "-" + lineNumber, parts[0]);
} else if (parts.length == 2) {
glStrings.put(parts[0], parts[1]);
} else {
id = filename + "-" + (lineNumber - 1);
glString = parts[0];
} else if (parts.length >= 2) {
id = parts[0];
glString = parts[1];

if (parts.length == 3) note = parts[2];
}
else {
LOGGER.warning("Unexpected line format at line "
+ lineNumber + ": " + filename);
+ (lineNumber - 1) + ": " + filename);

continue;
}

lineNumber++;

linkedGLStrings.add(inflateGenotypeList(id, glString, note));

}
}

return linkedGLStrings;
}

private static LinkageDisequilibriumGenotypeList inflateGenotypeList(String id, String glString, String note) {
LinkageDisequilibriumGenotypeList linkedGLString;

String submittedGlString = glString;

if (!GLStringUtilities.validateGLStringFormat(glString)) {
glString = GLStringUtilities.fullyQualifyGLString(glString);
}

MultilocusUnphasedGenotype mug = GLStringUtilities.convertToMug(glString);
linkedGLString = new LinkageDisequilibriumGenotypeList(id, mug);

linkedGLString.setSubmittedGlString(submittedGlString);
linkedGLString.setNote(note);

return glStrings;
return linkedGLString;
}

public static MultilocusUnphasedGenotype convertToMug(String glString) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
public class LinkageDisequilibriumGenotypeList {
private String id;
private String glString;
private String note;
private String submittedGlString;

private MultilocusUnphasedGenotype mug;
Expand Down Expand Up @@ -105,6 +106,14 @@ public LinkageDisequilibriumGenotypeList(String id, MultilocusUnphasedGenotype m
}
}

public String getNote() {
return note;
}

public void setNote(String note) {
this.note = note;
}

public String getSubmittedGlString() {
return submittedGlString;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
<xs:element name="processed-gl-string" type="xs:string" minOccurs="0"/>
</xs:sequence>
<xs:attribute name="id" type="xs:string"/>
<xs:attribute name="note" type="xs:string"/>
</xs:complexType>
</xs:element>
<xs:element name="gl-freq">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
package org.dash;

import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.List;

import org.dash.valid.HLALinkageDisequilibrium;
Expand All @@ -49,10 +48,10 @@ public void testLinkageReportingExamples() {

@Test
public void testLinkageReportingMugs() throws IOException {
LinkedHashMap<String, String> glStrings = GLStringUtilities.readGLStringFile("fullyQualifiedExample.txt");
List<LinkageDisequilibriumGenotypeList> glStrings = GLStringUtilities.readGLStringFile("fullyQualifiedExample.txt");

for (String key : glStrings.keySet()) {
MultilocusUnphasedGenotype mug = GLStringUtilities.convertToMug(glStrings.get(key));
for (LinkageDisequilibriumGenotypeList linkedGLString : glStrings) {
MultilocusUnphasedGenotype mug = GLStringUtilities.convertToMug(linkedGLString.getGLString());

assertNotNull(mug);

Expand Down
20 changes: 6 additions & 14 deletions ld-validation/src/test/java/org/dash/gl/GLStringTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.LinkedHashMap;
import java.util.Set;
import java.util.List;

import org.dash.valid.Locus;
import org.dash.valid.gl.GLStringUtilities;
Expand All @@ -47,20 +46,13 @@ public class GLStringTest extends TestCase {

@Before
public void setUp() throws IOException {
LinkedHashMap<String, String> validGLStrings = GLStringUtilities.readGLStringFile("fullyQualifiedExample.txt");
List<LinkageDisequilibriumGenotypeList> validGLStrings = GLStringUtilities.readGLStringFile("fullyQualifiedExample.txt");

glString = validGLStrings.get(0);

Set<String> keys = validGLStrings.keySet();
List<LinkageDisequilibriumGenotypeList> strictGLStrings = GLStringUtilities.readGLStringFile("strictExample.txt");

for (String key : keys) {
glString = new LinkageDisequilibriumGenotypeList(key, GLStringUtilities.fullyQualifyGLString(validGLStrings.get(key)));
}

LinkedHashMap<String, String> strictGLStrings = GLStringUtilities.readGLStringFile("strictExample.txt");

keys = strictGLStrings.keySet();
String key = keys.iterator().next();

STRICT_GL_STRING = strictGLStrings.get(key);
STRICT_GL_STRING = strictGLStrings.get(0).getGLString();
}

@Test
Expand Down
22 changes: 12 additions & 10 deletions ld-validation/src/test/java/org/dash/gl/GLStringUtilitiesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@
package org.dash.gl;

import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Set;

import org.dash.valid.Locus;
import org.dash.valid.freq.HLAFrequenciesLoader;
import org.dash.valid.gl.GLStringConstants;
import org.dash.valid.gl.GLStringUtilities;
import org.dash.valid.gl.LinkageDisequilibriumGenotypeList;
import org.junit.Test;
import org.nmdp.gl.MultilocusUnphasedGenotype;

Expand All @@ -56,6 +56,7 @@ public class GLStringUtilitiesTest extends TestCase {
private static final String VALID_GL_STRING_MAC = "HLA-A*01:01/HLA-A*01:02+HLA-A*26:01^HLA-C*01:01/HLA-C*01:03+HLA-C*04:01";
private static final String TAB_DELIMITED = "TAB_DELIMITED";
private static final String COMMA_DELIMITED = "COMMA_DELIMITED";
private static final String MY_NOTE = "My Note";

@Test
public void testParse() {
Expand All @@ -79,25 +80,26 @@ public void testHasFrequency() throws IOException {

@Test
public void testTabDelimitedGLStringFile() {
LinkedHashMap<String, String> glStrings = GLStringUtilities.readGLStringFile("tabDelimitedExample.txt");
for (String key : glStrings.keySet()) {
assertTrue(TAB_DELIMITED.equals(key));
List<LinkageDisequilibriumGenotypeList> glStrings = GLStringUtilities.readGLStringFile("tabDelimitedExample.txt");
for (LinkageDisequilibriumGenotypeList linkedGLString : glStrings) {
assertTrue(TAB_DELIMITED.equals(linkedGLString.getId()));
assertTrue(MY_NOTE.equals(linkedGLString.getNote()));
}
}

@Test
public void testCommaDelimitedGLStringFile() {
LinkedHashMap<String, String> glStrings = GLStringUtilities.readGLStringFile("commaDelimitedExample.txt");
for (String key : glStrings.keySet()) {
assertTrue(COMMA_DELIMITED.equals(key));
List<LinkageDisequilibriumGenotypeList> glStrings = GLStringUtilities.readGLStringFile("commaDelimitedExample.txt");
for (LinkageDisequilibriumGenotypeList linkedGLString : glStrings) {
assertTrue(COMMA_DELIMITED.equals(linkedGLString.getId()));
}
}

@Test
public void testHMLFile() {
LinkedHashMap<String, String> glStrings = GLStringUtilities.readGLStringFile("hml_1_0_2-example7-ngsFull.xml");
for (String key : glStrings.keySet()) {
assertTrue("1367-7150-8".equals(key));
List<LinkageDisequilibriumGenotypeList> glStrings = GLStringUtilities.readGLStringFile("hml_1_0_2-example7-ngsFull.xml");
for (LinkageDisequilibriumGenotypeList linkedGLString : glStrings) {
assertTrue("1367-7150-8".equals(linkedGLString.getId()));
}
}

Expand Down
2 changes: 1 addition & 1 deletion ld-validation/src/test/resources/tabDelimitedExample.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
TAB_DELIMITED HLA-A*01:01:01:01+HLA-A*26:01:01^HLA-B*38:01:01/HLA-B*38:27+HLA-B*44:03:01/HLA-B*44:03:10/HLA-B*44:125^HLA-C*04:01:01:01/HLA-C*04:01:01:02/HLA-C*04:01:01:03/HLA-C*04:01:01:04/HLA-C*04:01:01:05/HLA-C*04:20/HLA-C*04:117+HLA-C*12:03:01:01/HLA-C*12:03:01:02/HLA-C*12:34^HLA-DPA1*01:03:01:01/HLA-DPA1*01:03:01:02/HLA-DPA1*01:03:01:03/HLA-DPA1*01:03:01:04/HLA-DPA1*01:03:01:05+HLA-DPA1*01:03:01:01/HLA-DPA1*01:03:01:02/HLA-DPA1*01:03:01:03/HLA-DPA1*01:03:01:04/HLA-DPA1*01:03:01:05^HLA-DPB1*04:01:01:01/HLA-DPB1*04:01:01:02+HLA-DPB1*04:01:01:01/HLA-DPB1*04:01:01:02^HLA-DQA1*02:01+HLA-DQA1*05:05:01:01/HLA-DQA1*05:05:01:02/HLA-DQA1*05:05:01:03/HLA-DQA1*05:09/HLA-DQA1*05:11^HLA-DQB1*02:02+HLA-DQB1*03:01:01:01/HLA-DQB1*03:01:01:02/HLA-DQB1*03:01:01:03^HLA-DRB1*07:01:01:01/HLA-DRB1*07:01:01:02+HLA-DRB1*11:01:01^HLA-DRB3*02:02:01:01/HLA-DRB3*02:02:01:02^HLA-DRB4*01:01:01:01/HLA-DRB4*03:01N
TAB_DELIMITED HLA-A*01:01:01:01+HLA-A*26:01:01^HLA-B*38:01:01/HLA-B*38:27+HLA-B*44:03:01/HLA-B*44:03:10/HLA-B*44:125^HLA-C*04:01:01:01/HLA-C*04:01:01:02/HLA-C*04:01:01:03/HLA-C*04:01:01:04/HLA-C*04:01:01:05/HLA-C*04:20/HLA-C*04:117+HLA-C*12:03:01:01/HLA-C*12:03:01:02/HLA-C*12:34^HLA-DPA1*01:03:01:01/HLA-DPA1*01:03:01:02/HLA-DPA1*01:03:01:03/HLA-DPA1*01:03:01:04/HLA-DPA1*01:03:01:05+HLA-DPA1*01:03:01:01/HLA-DPA1*01:03:01:02/HLA-DPA1*01:03:01:03/HLA-DPA1*01:03:01:04/HLA-DPA1*01:03:01:05^HLA-DPB1*04:01:01:01/HLA-DPB1*04:01:01:02+HLA-DPB1*04:01:01:01/HLA-DPB1*04:01:01:02^HLA-DQA1*02:01+HLA-DQA1*05:05:01:01/HLA-DQA1*05:05:01:02/HLA-DQA1*05:05:01:03/HLA-DQA1*05:09/HLA-DQA1*05:11^HLA-DQB1*02:02+HLA-DQB1*03:01:01:01/HLA-DQB1*03:01:01:02/HLA-DQB1*03:01:01:03^HLA-DRB1*07:01:01:01/HLA-DRB1*07:01:01:02+HLA-DRB1*11:01:01^HLA-DRB3*02:02:01:01/HLA-DRB3*02:02:01:02^HLA-DRB4*01:01:01:01/HLA-DRB4*03:01N My Note

0 comments on commit 9429c1c

Please sign in to comment.