Skip to content

Commit

Permalink
Fixed CWD logic and now makes use of IMGT files automatically
Browse files Browse the repository at this point in the history
  • Loading branch information
mpresteg committed May 29, 2017
1 parent d3e901c commit 845c275
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 119 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ public static AntigenRecognitionSiteLoader getInstance() throws IOException, Inv
catch (IOException | ParserConfigurationException | SAXException e) {
LOGGER.info("Couldn't find IMGT file in the correct format for hladb: " + hladb);
instance.init();
System.setProperty(GLStringConstants.HLADB_PROPERTY, "Default");

// TODO: Make final determination - commenting this in messes up the CWD logic currently
//System.setProperty(GLStringConstants.HLADB_PROPERTY, "Default");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,42 +25,32 @@
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.net.URL;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.logging.Logger;
import java.util.zip.ZipInputStream;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.dash.valid.gl.GLStringConstants;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

public class CommonWellDocumentedLoader {
private static final Logger LOGGER = Logger.getLogger(CommonWellDocumentedLoader.class.getName());

private static final String NOT_APPLICABLE = "NA";
private static CommonWellDocumentedLoader instance = null;

private Set<String> cwdAlleles;

private HashMap<String, String> cwdByAccession;
private HashMap<String, List<String>> hlaDbByAccession;

public HashMap<String, String> getCwdByAccession() {
return cwdByAccession;
}

private void setCwdByAccession(HashMap<String, String> cwdByAccession) {
this.cwdByAccession = cwdByAccession;
}

public HashMap<String, List<String>> getHlaDbByAccession() {
return hlaDbByAccession;
}

private void setHlaDbByAccession(HashMap<String, List<String>> hlaDbByAccession) {
this.hlaDbByAccession = hlaDbByAccession;
}
private HashMap<String, String> accessionMap;

private CommonWellDocumentedLoader(String hladb) {
init(hladb);
Expand All @@ -85,29 +75,67 @@ private void init(String hladb) {

}

public void loadCommonWellDocumentedAlleles(String hladb) throws IOException, FileNotFoundException {
String filename = "reference/CWD.txt";
public HashMap<String, String> loadFromIMGT(String hladb) throws IOException, ParserConfigurationException, SAXException {
HashMap<String, String> accessionMap = new HashMap<String, String>();

if (hladb == null) hladb = GLStringConstants.LATEST_HLADB;
URL url = new URL("https://raw.githubusercontent.com/ANHIG/IMGTHLA/" + hladb.replace(GLStringConstants.PERIOD, GLStringConstants.EMPTY_STRING) + "/xml/hla.xml.zip");

ZipInputStream zipStream = new ZipInputStream(url.openStream());
zipStream.getNextEntry();
BufferedReader reader = new BufferedReader(new InputStreamReader(zipStream));
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
InputSource is = new InputSource(reader);
Document doc = builder.parse(is);

String name;
String accession;

NodeList nList = doc.getElementsByTagName("allele");
for (int i=0;i<nList.getLength();i++) {
name = nList.item(i).getAttributes().getNamedItem("name").getNodeValue();
accession = nList.item(i).getAttributes().getNamedItem("id").getNodeValue();
accessionMap.put(name, accession);
}

return accessionMap;
}

public void loadCommonWellDocumentedAlleles(String hladb) throws IOException, FileNotFoundException {
Set<String> cwdSet = new HashSet<String>();
HashMap<String, String> accessionMap = null;
boolean accessionLoaded = false;

if (hladb == null) hladb = GLStringConstants.LATEST_HLADB;

try {
accessionMap = loadFromIMGT(hladb);
}
catch (IOException | ParserConfigurationException | SAXException e) {
LOGGER.info("Could not load file from IMGT for hladb: " + hladb);
}

HashMap<String, String> cwdByAccession = new HashMap<String, String>();
HashMap<String, List<String>> hlaDbByAccession = new HashMap<String, List<String>>();
List<String> hladbs;
Set<String> cwdSet = new HashSet<String>();
if (accessionMap != null && accessionMap.size() > 0) {
accessionLoaded = true;
}
else {
accessionMap = new HashMap<String, String>();
}

String filename = "reference/CWD.txt";

BufferedReader reader = new BufferedReader(new InputStreamReader(CommonWellDocumentedLoader.class.getClassLoader().getResourceAsStream(filename)));
String row;
String[] columns;
List<String> headers = null;
int idx = 0;

int hladbIdx = -1;

List<String> headers = null;

while ((row = reader.readLine()) != null) {
hladbs = new ArrayList<String>();
columns = row.split(GLStringConstants.TAB);
cwdByAccession.put(columns[0], GLStringConstants.HLA_DASH + columns[1]);


if (idx < 1) {
headers = Arrays.asList(columns);

Expand All @@ -119,22 +147,17 @@ public void loadCommonWellDocumentedAlleles(String hladb) throws IOException, Fi
}
}
else {
cwdSet.add(GLStringConstants.HLA_DASH + columns[hladbIdx]);
}

for (int i=0;i<columns.length-1;i++) {
if (!columns[i+1].equals(NOT_APPLICABLE)) {
hladbs.add(headers.get(i+1));
cwdSet.add(columns[0]);
if (!accessionLoaded) {
accessionMap.put(GLStringConstants.HLA_DASH + columns[hladbIdx], columns[0]);
}
}

hlaDbByAccession.put(columns[0], hladbs);
idx++;
}

setHlaDbByAccession(hlaDbByAccession);
setCwdByAccession(cwdByAccession);
setCwdAlleles(cwdSet);
setAccessionMap(accessionMap);

reader.close();
}
Expand All @@ -147,21 +170,11 @@ private void setCwdAlleles(Set<String> cwdAlleles) {
this.cwdAlleles = cwdAlleles;
}

public String getAccessionByAllele(String allele) {
if (!getCwdByAccession().containsValue(allele)) {
return null;
}

for (String key : getCwdByAccession().keySet()) {
if (getCwdByAccession().get(key).equals(allele)) {
return key;
}
}

return null;
public HashMap<String, String> getAccessionMap() {
return this.accessionMap;
}

public List<String> getHlaDbsByAccession(String accession) {
return getHlaDbByAccession().get(accession);
private void setAccessionMap(HashMap<String, String> accessionMap) {
this.accessionMap = accessionMap;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -166,44 +166,27 @@ public static boolean validateGLStringFormat(String glString) {

public static Set<String> checkCommonWellDocumented(String glString) {
Set<String> notCommon = new HashSet<String>();

CommonWellDocumentedLoader loader = CommonWellDocumentedLoader.getInstance();

HashMap<String, String> accessionMap = loader.getAccessionMap();

Set<String> cwdAlleles = CommonWellDocumentedLoader.getInstance()
.getCwdAlleles();
Set<String> cwdAlleles = loader.getCwdAlleles();

StringTokenizer st = new StringTokenizer(glString,
GL_STRING_DELIMITER_REGEX);
String token;
while (st.hasMoreTokens()) {
token = st.nextToken();

if (!checkCommonWellDocumented(cwdAlleles, token)) {
if (!cwdAlleles.contains(accessionMap.get(token))) {
notCommon.add(token);
}

}

return notCommon;
}

/**
* @param cwdAlleles
* @param token
*/
private static boolean checkCommonWellDocumented(Set<String> cwdAlleles,
String allele) {
if (cwdAlleles.contains(allele)) {
return true;
}

for (String cwdAllele : cwdAlleles) {
if (allele.equals(cwdAllele)) {
return true;
}
}

return false;
}

public static boolean fieldLevelComparison(String allele,
String referenceAllele) {
if (allele == null || referenceAllele == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import java.io.IOException;
import java.util.logging.Logger;

import org.dash.valid.cwd.CommonWellDocumentedLoader;
import org.dash.valid.gl.GLStringConstants;
import org.dash.valid.handler.CommonWellDocumentedFileHandler;

Expand Down Expand Up @@ -63,18 +62,13 @@ public static String formatCommonWellDocumented(
StringBuffer sb = new StringBuffer("Id: " + findings.getGLId() + GLStringConstants.NEWLINE + "GL String: " + findings.getGLString());
sb.append(GLStringConstants.NEWLINE + GLStringConstants.NEWLINE + "HLA DB Version: " + findings.getHladb() + GLStringConstants.NEWLINE);

CommonWellDocumentedLoader loader = CommonWellDocumentedLoader.getInstance();
String accession;

for (String allele : findings.getNonCWDAlleles()) {
sb.append("WARNING - Allele: " + allele + " not in the CWD list for HLA DB: " + findings.getHladb());
accession = loader.getAccessionByAllele(allele);
if (accession != null) {
sb.append(" (Found under accession: " + accession + " in these HLA DBs: " +
loader.getHlaDbsByAccession(accession) + ")");
}
sb.append("WARNING - Allele: " + allele + " not in the CWD list. ");
sb.append(GLStringConstants.NEWLINE);
}

sb.append(GLStringConstants.NEWLINE);

return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,7 @@
*/
package org.dash;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.ProtocolException;
import java.net.URL;
import java.util.LinkedHashMap;
import java.util.List;

Expand All @@ -42,7 +35,6 @@
import org.dash.valid.gl.LinkageDisequilibriumGenotypeList;
import org.dash.valid.gl.haplo.Haplotype;
import org.dash.valid.report.DetectedLinkageFindings;
import org.junit.Ignore;
import org.junit.Test;
import org.nmdp.gl.MultilocusUnphasedGenotype;

Expand Down Expand Up @@ -110,15 +102,11 @@ public void testPhasedGenotypeList() throws IOException {

@Test
public void testLinkageReportingInlineGLString() throws IOException {
//String fullyQualified = GLStringUtilities.fullyQualifyGLString("HLA-A*11:01:01+HLA-A*24:02:01:01/HLA-A*24:02:01:02L/HLA-A*24:02:01:03^HLA-B*18:01:01:01/HLA-B*18:01:01:02/HLA-B*18:51+HLA-B*53:01:01^HLA-C*04:01:01:01/HLA-C*04:01:01:02/HLA-C*04:01:01:03/HLA-C*04:01:01:04/HLA-C*04:01:01:05/HLA-C*04:20/HLA-C*04:117+HLA-C*12:03:01:01/HLA-C*12:03:01:02/HLA-C*12:34^HLA-DPA1*01:03:01:01/HLA-DPA1*01:03:01:02/HLA-DPA1*01:03:01:03/HLA-DPA1*01:03:01:04/HLA-DPA1*01:03:01:05+HLA-DPA1*02:01:01^HLA-DPB1*02:01:02+HLA-DPB1*09:01^HLA-DQA1*01:02:01:01/HLA-DQA1*01:02:01:02/HLA-DQA1*01:02:01:03/HLA-DQA1*01:02:01:04/HLA-DQA1*01:11+HLA-DQA1*03:01:01^HLA-DQB1*03:05:01+HLA-DQB1*06:09^HLA-DRB1*11:04:01+HLA-DRB1*13:02:01^HLA-DRB3*02:02:01:01/HLA-DRB3*02:02:01:02+HLA-DRB3*03:01:01");

String fullyQualified = GLStringUtilities.fullyQualifyGLString("HLA-A*24:02:01:01~HLA-C*04:01:01:06~HLA-B*35:02:01~HLA-DRB3*02:02:01:02~HLA-DRB1*11:01:01:01~HLA-DQA1*05:05:01:01/HLA-DQA1*05:05:01:02~HLA-DQB1*03:01:01:03~HLA-DPA1*01:03:01:01~HLA-DPB1*05:01:01+HLA-A*11:01:01:01~HLA-C*12:03:01:01~HLA-B*35:03:01~HLA-DRB3*02:02:01:01~HLA-DRB1*13:01:01:01/HLA-DRB1*13:01:01:02~HLA-DQA1*01:03:01:02~HLA-DQB1*06:03:01~HLA-DPA1*02:01:01:01~HLA-DPB1*13:01:01/HLA-DPB1*107:01");
LinkageDisequilibriumGenotypeList genotypeList = new LinkageDisequilibriumGenotypeList("SBCFMW0003", fullyQualified);
//MultilocusUnphasedGenotype mug = GLStringUtilities.convertToMug(fullyQualified);
//DetectedLinkageFindings findings = LinkageDisequilibriumAnalyzer.detectLinkages(mug);

DetectedLinkageFindings findings = LinkageDisequilibriumAnalyzer.detectLinkages(genotypeList);
String fullyQualified = GLStringUtilities.fullyQualifyGLString("HLA-A*11:01:01+HLA-A*24:02:01:01/HLA-A*24:02:01:02L/HLA-A*24:02:01:03^HLA-B*18:01:01:01/HLA-B*18:01:01:02/HLA-B*18:51+HLA-B*53:01:01^HLA-C*04:01:01:01/HLA-C*04:01:01:02/HLA-C*04:01:01:03/HLA-C*04:01:01:04/HLA-C*04:01:01:05/HLA-C*04:20/HLA-C*04:117+HLA-C*12:03:01:01/HLA-C*12:03:01:02/HLA-C*12:34^HLA-DPA1*01:03:01:01/HLA-DPA1*01:03:01:02/HLA-DPA1*01:03:01:03/HLA-DPA1*01:03:01:04/HLA-DPA1*01:03:01:05+HLA-DPA1*02:01:01^HLA-DPB1*02:01:02+HLA-DPB1*09:01^HLA-DQA1*01:02:01:01/HLA-DQA1*01:02:01:02/HLA-DQA1*01:02:01:03/HLA-DQA1*01:02:01:04/HLA-DQA1*01:11+HLA-DQA1*03:01:01^HLA-DQB1*03:05:01+HLA-DQB1*06:09^HLA-DRB1*11:04:01+HLA-DRB1*13:02:01^HLA-DRB3*02:02:01:01/HLA-DRB3*02:02:01:02+HLA-DRB3*03:01:01");

MultilocusUnphasedGenotype mug = GLStringUtilities.convertToMug(fullyQualified);
DetectedLinkageFindings findings = LinkageDisequilibriumAnalyzer.detectLinkages(mug);

assertNotNull(findings);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,11 @@

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.List;

import junit.framework.TestCase;

import org.junit.Test;

import junit.framework.TestCase;

public class CommonWellDocumentedLoaderTest extends TestCase {
private static final String DQA10111 = "HLA-DQA1*01:11";
private static final String HLA08433 = "HLA08433";
Expand All @@ -42,16 +41,8 @@ public void test() {
@Test
public void testLoadAllCWD() throws FileNotFoundException, IOException {
CommonWellDocumentedLoader cwdLoader = CommonWellDocumentedLoader.getInstance();
List<String> hladbs;

assertTrue(cwdLoader.getCwdByAccession().containsValue(DQA10111));
for (String key : cwdLoader.getCwdByAccession().keySet()) {
if (cwdLoader.getCwdByAccession().get(key).equals(DQA10111)) {
assertTrue(key.equals(HLA08433));
hladbs = cwdLoader.getHlaDbByAccession().get(key);
assertNotNull(hladbs);
break;
}
}
assertTrue(cwdLoader.getAccessionMap().containsKey(DQA10111));
assertTrue(cwdLoader.getAccessionMap().get(DQA10111).equals(HLA08433));
}
}

0 comments on commit 845c275

Please sign in to comment.