Skip to content

Commit

Permalink
fixe filter file reading
Browse files Browse the repository at this point in the history
  • Loading branch information
konstantinberlin committed Jan 19, 2015
1 parent 3c615fa commit aaf4927
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 20 deletions.
10 changes: 6 additions & 4 deletions src/main/java/edu/umd/marbl/mhap/main/MhapMain.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public final class MhapMain
{
private final double acceptScore;

private final HashSet<Integer> filter;
private final HashSet<Long> filter;

private final String inFile;

Expand Down Expand Up @@ -289,9 +289,6 @@ public MhapMain(ParseOptions options) throws IOException
this.minStoreLength = options.get("--min-store-length").getInteger();
this.maxShift = options.get("--max-shift").getDouble();
this.acceptScore = options.get("--threshold").getDouble();

this.kmerCounter = recordFastaKmerCounts(inFile, options.get("--filter-threshold").getDouble());
//this.kmerCounter = null;

// read in the kmer filter set
String filterFile = options.get("-f").getString();
Expand All @@ -309,9 +306,14 @@ public MhapMain(ParseOptions options) throws IOException
throw new MhapRuntimeException("Could not parse k-mer filter file.", e);
}
System.err.println("Time (s) to read filter file: " + (System.nanoTime() - startTime) * 1.0e-9);

this.kmerCounter = null;
}
else
{
this.filter = null;
this.kmerCounter = recordFastaKmerCounts(inFile, options.get("--filter-threshold").getDouble());
}

}

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/edu/umd/marbl/mhap/sketch/MinHash.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public final class MinHash implements Serializable


public final static int[] computeKmerMinHashesWeightedIntSuper(String seq, final int kmerSize, final int numHashes,
HashSet<Integer> filter, KmerCounts kmerCount)
HashSet<Long> filter, KmerCounts kmerCount)
{
final int numberKmers = seq.length() - kmerSize + 1;

Expand Down Expand Up @@ -241,7 +241,7 @@ private MinHash(int seqLength, int[] minHashes)
this.minHashes = minHashes;
}

public MinHash(Sequence seq, int kmerSize, int numHashes, HashSet<Integer> filter, KmerCounts kmerCount)
public MinHash(Sequence seq, int kmerSize, int numHashes, HashSet<Long> filter, KmerCounts kmerCount)
{
this.seqLength = seq.length();

Expand Down
10 changes: 5 additions & 5 deletions src/main/java/edu/umd/marbl/mhap/sketch/MinHashSearch.java
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ public boolean addSequence(SequenceSketch currHash)
public List<MatchResult> findMatches(SequenceSketch seqHashes, boolean toSelf)
{
//for performance reasons might need to change
//long startTime = System.nanoTime();
long startTime = System.nanoTime();

MinHash minHash = seqHashes.getMinHashes();

Expand Down Expand Up @@ -204,8 +204,8 @@ public List<MatchResult> findMatches(SequenceSketch seqHashes, boolean toSelf)
}

//record the search time
//long minHashEndTime = System.nanoTime();
//this.minhashSearchTime.getAndAdd(minHashEndTime - startTime);
long minHashEndTime = System.nanoTime();
this.minhashSearchTime.getAndAdd(minHashEndTime - startTime);

//record number of hash matches processed
this.numberSequencesHit.getAndAdd(bestSequenceHit.size());
Expand Down Expand Up @@ -269,8 +269,8 @@ public List<MatchResult> findMatches(SequenceSketch seqHashes, boolean toSelf)

//record the search time
//TODO not clear why not working. Perhaps everything is too fast?
//long endTime = System.nanoTime();
//this.sortMergeSearchTime.getAndAdd(endTime-minHashEndTime);
long endTime = System.nanoTime();
this.sortMergeSearchTime.getAndAdd(endTime-minHashEndTime);

return matches;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ public SequenceSketch(SequenceId id, MinHash mainHashes, OrderKmerHashes ordered
}

public SequenceSketch(Sequence seq, int kmerSize, int numHashes, int orderedKmerSize,
boolean storeHashes, HashSet<Integer> filter, KmerCounts kmerCount)
boolean storeHashes, HashSet<Long> filter, KmerCounts kmerCount)
{
this.id = seq.getId();
this.mainHashes = new MinHash(seq, kmerSize, numHashes, filter, kmerCount);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public class SequenceSketchStreamer
{
private final DataInputStream buffInput;
private final FastaData fastaData;
private final HashSet<Integer> filter;
private final HashSet<Long> filter;
private final KmerCounts kmerCounter;
private final int kmerSize;
private final AtomicLong numberProcessed;
Expand Down Expand Up @@ -91,7 +91,7 @@ public SequenceSketchStreamer(String file, int offset) throws FileNotFoundExcept
}

public SequenceSketchStreamer(String file, int kmerSize, int numHashes, int subSequenceSize, int orderedKmerSize,
HashSet<Integer> filter, KmerCounts kmerCounter, int offset) throws IOException
HashSet<Long> filter, KmerCounts kmerCounter, int offset) throws IOException
{
this.fastaData = new FastaData(file, offset);
this.readingFasta = true;
Expand Down
11 changes: 5 additions & 6 deletions src/main/java/edu/umd/marbl/mhap/utils/Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
import com.google.common.hash.Hashing;

import edu.umd.marbl.mhap.general.Sequence;
import edu.umd.marbl.mhap.sketch.MinHash;

public final class Utils
{
Expand Down Expand Up @@ -460,7 +459,7 @@ public final static int countLetterInRead(String fasta, String letter, Boolean c
return count;
}

public final static HashSet<Integer> createKmerFilter(String fileName, double maxPercent, int kmerSize)
public final static HashSet<Long> createKmerFilter(String fileName, double maxPercent, int kmerSize)
throws IOException
{
File file = new File(fileName);
Expand All @@ -469,7 +468,7 @@ public final static HashSet<Integer> createKmerFilter(String fileName, double ma
try (BufferedReader bf = new BufferedReader(new FileReader(file), BUFFER_BYTE_SIZE);)
{
// generate hashset
ArrayList<Integer> filterArray = new ArrayList<Integer>();
ArrayList<Long> filterArray = new ArrayList<Long>();

String line = bf.readLine();
while (line != null)
Expand All @@ -485,12 +484,12 @@ public final static HashSet<Integer> createKmerFilter(String fileName, double ma
// if greater, add to hashset
if (percent > maxPercent)
{
int[] minHash = MinHash.computeKmerMinHashes(str[0], kmerSize, 0, null);
long[] minHash = Utils.computeSequenceHashesLong(str[0], kmerSize);

if (minHash.length > 1)
System.err.println("Warning filter file kmer size larger than setting!");

for (int val : minHash)
for (long val : minHash)
filterArray.add(val);
}
else
Expand All @@ -499,7 +498,7 @@ public final static HashSet<Integer> createKmerFilter(String fileName, double ma
// read the next line
line = bf.readLine();
}
return new HashSet<Integer>(filterArray);
return new HashSet<Long>(filterArray);
}
}

Expand Down

0 comments on commit aaf4927

Please sign in to comment.