Skip to content

Commit

Permalink
Merge pull request #6 from ppillot/kbit-distance
Browse files Browse the repository at this point in the history
Kbit distance with Single Matching distance
  • Loading branch information
ppillot authored Jan 6, 2024
2 parents 571c18f + b3d2b92 commit 36fad6b
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 14 deletions.
2 changes: 1 addition & 1 deletion rollup.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ export default {
compress: {ecma: 2015, passes: 3, unsafe: true},
mangle: {
properties: {
reserved: ['biomsa']
reserved: ['biomsa', 'gapopen', 'gapextend', 'matrix', 'method', 'type', 'gapchar', 'debug']
}
},
nameCache: {}
Expand Down
27 changes: 14 additions & 13 deletions src/sequence/sequence.ts
Original file line number Diff line number Diff line change
Expand Up @@ -261,8 +261,10 @@ export function distanceMatrix(tabSeq: TSequence[]) {
// Here, for reasons of computational speed, we compute binary matchings: each
// kmer value is associated with a bit in a BitSet. Intersection size between
// bitsets increases with sequence proximity.
// The distance is computed as a Tanimoto distance between the BitSets of 2
// sequences.
// The distance is computed as a Simple Matching Distance between the
// BitSets of 2 sequences. Compared to a Tanimoto distance, it also counts
// non-set bits (0) as matches which disfavours comparisons between sequences
// of various sizes.
// When comparing several sequences between each other, if the sequences have
// a noticeable variety of sizes, longer sequences will tend to have more
// matches than shorter ones.
Expand All @@ -277,7 +279,7 @@ export function distanceMatrix(tabSeq: TSequence[]) {
const l = tabSeq.length;
const distTab: number[][] = tabSeq.map(() => []);
let lKmerI: BitArray;
let lDistance: number;
let distance: number;
let kbitsICount: number;
let kbitsJCount: number;
let commonKbitsCount: number;
Expand All @@ -291,25 +293,24 @@ export function distanceMatrix(tabSeq: TSequence[]) {
backgroundMatchingProbability = computeBackgroundKmerMatch(kbitsICount, bitsetLength)

for (let j = i + 1; j < l; j++) {
commonKbitsCount = lKmerI.getIntersectionSize(lKmer[j])
kbitsJCount = lKmer[j].getSize()
commonKbitsCount = lKmerI.getIntersectionSize(lKmer[j])
commonKbitsCount += bitsetLength - (kbitsICount + kbitsJCount - commonKbitsCount)
expectedRandomMatches = Math.ceil(backgroundMatchingProbability * kbitsJCount);
commonKbitsCount -= expectedRandomMatches;
commonKbitsCount = Math.max(commonKbitsCount, 0)

// Tanimoto/Jacquard distance corrected for random matches
lDistance = 1 - (
commonKbitsCount / (
kbitsICount - expectedRandomMatches
+ kbitsJCount - expectedRandomMatches
- commonKbitsCount
)
);
distTab[j][i] = distTab[i][j] = Math.max(lDistance, 0);
distance = 1 - (commonKbitsCount / bitsetLength);
distTab[j][i] = distTab[i][j] = distance;
}

}

if (DEBUG) Log.add('K-mer distance computation');
if (DEBUG) {
Log.add('K-mer distance computation');
console.table(distTab)
}
return distTab;
}

Expand Down

0 comments on commit 36fad6b

Please sign in to comment.