Skip to content

Commit

Permalink
bug fixes and code cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
konstantinberlin committed May 13, 2015
1 parent 144ac68 commit c89d035
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 5 deletions.
Binary file added lib/commons-compress-1.9.jar
Binary file not shown.
Binary file added lib/fastutil-7.0.2.jar
Binary file not shown.
10 changes: 9 additions & 1 deletion src/main/java/edu/umd/marbl/mhap/sketch/MinHashSketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,15 @@ private final static int[] computeNgramMinHashesWeighted(String seq, final int n
{
if (weighted)
{
weight = (int)(((double)weight)*kmerFilter.idfDiscrete(key, 3));
//compute the td part
double td = (double)weight;
//td = Math.log1p(td)*3.4;

//compute the idf part
double idf = kmerFilter.idfDiscrete(key, 3);

//compute td-idf
weight = (int)Math.round(td*idf);
if (weight<1)
weight = 1;
}
Expand Down
37 changes: 33 additions & 4 deletions src/main/java/edu/umd/marbl/mhap/sketch/OrderedNGramHashes.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
*/
package edu.umd.marbl.mhap.sketch;

import it.unimi.dsi.fastutil.ints.IntArrays;

import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
Expand Down Expand Up @@ -62,6 +64,16 @@ public int compareTo(SortableIntPair p)
{
return Integer.compare(this.x, p.x);
}

/* (non-Javadoc)
* @see java.lang.Object#toString()
*/
@Override
public String toString()
{
return "["+x + ", " + y + "]";
}

}

private final int[][] orderedHashes;
Expand Down Expand Up @@ -176,17 +188,34 @@ private int[][] getFullHashes(String seq, int subKmerSize)
if (val <= cutoff)
count++;

SortableIntPair[] completeHashAsPair = new SortableIntPair[count];
int[] cutHashes = new int[count];
int[] perm = new int[count];
int[] pos = new int[count];

count = 0;
for (int iter = 0; iter < hashes.length; iter++)
if (hashes[iter] <= cutoff)
{
completeHashAsPair[count] = new SortableIntPair(hashes[iter], iter);
cutHashes[count] = hashes[iter];
perm[count] = count;
pos[count] = iter;

count++;
}

//sort the array
IntArrays.radixSortIndirect(perm, cutHashes, true);

// sort the results, sort is in place so no need to look at second
Arrays.sort(completeHashAsPair);
SortableIntPair[] completeHashAsPair = new SortableIntPair[count];
for (int iter=0; iter<count; iter++)
{
int index = perm[iter];
completeHashAsPair[iter] = new SortableIntPair(cutHashes[index], pos[index]);
}

//System.err.println(Arrays.toString(completeHashAsPair));
// sort the results, sort in place so no need to look at second
//Arrays.sort(completeHashAsPair);

return storeAsArray(completeHashAsPair);
}
Expand Down

0 comments on commit c89d035

Please sign in to comment.