Skip to content

Commit

Permalink
Add robin_hood maps
Browse files Browse the repository at this point in the history
  • Loading branch information
morispi committed Jun 21, 2020
1 parent 5e48429 commit b6f8148
Show file tree
Hide file tree
Showing 22 changed files with 2,405 additions and 50 deletions.
2 changes: 1 addition & 1 deletion BMEAN
Submodule BMEAN updated 3 files
+13 −12 bmean.cpp
+2 −2 bmean.h
+2,286 −0 robin_hood.h
8 changes: 4 additions & 4 deletions src/CONSENT-correction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,22 @@
#include "../CTPL/ctpl_stl.h"

std::mutex outMtx;
std::unordered_map<std::string, std::vector<bool>> readIndex;
robin_hood::unordered_map<std::string, std::vector<bool>> readIndex;
bool doTrimRead = true;

std::pair<std::string, std::string> processRead(int id, std::vector<Overlap>& alignments, unsigned minSupport, unsigned maxSupport, unsigned windowSize, unsigned merSize, unsigned commonKMers, unsigned minAnchors,unsigned solidThresh, unsigned windowOverlap, unsigned maxMSA, std::string path) {
std::string readId = alignments.begin()->qName;
std::unordered_map<std::string, std::string> sequences = getSequencesMap(alignments, readIndex);
robin_hood::unordered_map<std::string, std::string> sequences = getSequencesMap(alignments, readIndex);
std::vector<std::pair<unsigned, unsigned>> pilesPos = getAlignmentWindowsPositions(alignments.begin()->qLength, alignments, minSupport, maxSupport, windowSize, windowOverlap);
if (pilesPos.size() == 0) {
return std::make_pair(readId, "");
}
unsigned i = 0;

// Compute consensuses for all the piles
std::pair<std::string, std::unordered_map<kmer, unsigned>> resCons;
std::pair<std::string, robin_hood::unordered_map<kmer, unsigned>> resCons;
std::vector<std::string> consensuses(pilesPos.size());
std::vector<std::unordered_map<kmer, unsigned>> merCounts(pilesPos.size());
std::vector<robin_hood::unordered_map<kmer, unsigned>> merCounts(pilesPos.size());
std::vector<std::string> curPile;
std::vector<std::string> templates(pilesPos.size());
for (i = 0; i < pilesPos.size(); i++) {
Expand Down
3 changes: 2 additions & 1 deletion src/CONSENT-correction.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
#include <map>
#include "utils.h"
#include "Overlap.h"
#include "robin_hood.h"

std::pair<std::string, std::string> processRead(int id, std::vector<Overlap>& alignments, unsigned minSupport, unsigned maxSupport, unsigned windowSize, unsigned merSize, unsigned commonKMers, unsigned minAnchors, unsigned solidThresh, unsigned windowOverlap, unsigned maxMSA, std::string path);

void runCorrection(std::string PAFIndex, std::string alignmentFile, unsigned minSupport, unsigned maxSupport, unsigned windowSize, unsigned merSize, unsigned commonKMers, unsigned minAnchors, unsigned solidThresh, unsigned windowOverlap, unsigned nbThreads, std::string readsFile, std::string proofFile, unsigned maxMSA, std::string path);
void runCorrection(std::string PAFIndex, std::string alignmentFile, unsigned minSupport, unsigned maxSupport, unsigned windowSize, unsigned merSize, unsigned commonKMers, unsigned minAnchors, unsigned solidThresh, unsigned windowOverlap, unsigned nbThreads, std::string readsFile, std::string proofFile, unsigned maxMSA, std::string path);
12 changes: 6 additions & 6 deletions src/CONSENT-polishing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,21 @@
#include "../CTPL/ctpl_stl.h"

std::mutex outMtx;
std::unordered_map<std::string, std::vector<bool>> readIndex;
robin_hood::unordered_map<std::string, std::vector<bool>> readIndex;
bool doTrimRead = false;

std::pair<std::string, std::string> processContig(std::vector<Overlap>& alignments, unsigned minSupport, unsigned maxSupport, unsigned windowSize, unsigned merSize, unsigned commonKMers, unsigned minAnchors,unsigned solidThresh, unsigned windowOverlap, unsigned maxMSA, std::string path, unsigned nbThreads) {
std::string readId = alignments.begin()->qName;
std::unordered_map<std::string, std::string> sequences = getSequencesMap(alignments, readIndex);
robin_hood::unordered_map<std::string, std::string> sequences = getSequencesMap(alignments, readIndex);
std::vector<std::pair<unsigned, unsigned>> pilesPos = getAlignmentWindowsPositions(alignments.begin()->qLength, alignments, minSupport, maxSupport, windowSize, windowOverlap);
if (pilesPos.size() == 0) {
return std::make_pair(readId, "");
}

// Compute consensuses for all the piles
std::pair<std::string, std::unordered_map<kmer, unsigned>> resCons;
std::pair<std::string, robin_hood::unordered_map<kmer, unsigned>> resCons;
std::vector<std::string> consensuses(pilesPos.size());
std::vector<std::unordered_map<kmer, unsigned>> merCounts(pilesPos.size());
std::vector<robin_hood::unordered_map<kmer, unsigned>> merCounts(pilesPos.size());
std::vector<std::string> curPile;
std::vector<std::string> templates(pilesPos.size());

Expand All @@ -42,7 +42,7 @@ std::pair<std::string, std::string> processContig(std::vector<Overlap>& alignmen
std::string curTpl;

// Load the first jobs
vector<std::future<std::pair<std::string, std::unordered_map<kmer, unsigned>>>> results(poolSize);
vector<std::future<std::pair<std::string, robin_hood::unordered_map<kmer, unsigned>>>> results(poolSize);
while (jobsLoaded < poolSize && jobsLoaded < jobsToProcess) {
curPile = getAlignmentWindowsSequences(alignments, minSupport, windowSize, windowOverlap, sequences, pilesPos[jobsLoaded].first, pilesPos[jobsLoaded].second, merSize, maxSupport, commonKMers);
templates[jobsLoaded] = curPile[0];
Expand All @@ -52,7 +52,7 @@ std::pair<std::string, std::string> processContig(std::vector<Overlap>& alignmen

// Load the remaining jobs as other jobs terminate
int curJob = 0;
std::pair<std::string, std::unordered_map<kmer, unsigned>> curRes;
std::pair<std::string, robin_hood::unordered_map<kmer, unsigned>> curRes;
while(jobsLoaded < jobsToProcess) {
// Get the job results
curRes = results[curJob].get();
Expand Down
3 changes: 2 additions & 1 deletion src/CONSENT-polishing.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
#include <map>
#include "utils.h"
#include "Overlap.h"
#include "robin_hood.h"

std::pair<std::string, std::string> processContig(std::vector<Overlap>& alignments, unsigned minSupport, unsigned maxSupport, unsigned windowSize, unsigned merSize, unsigned commonKMers, unsigned minAnchors, unsigned solidThresh, unsigned windowOverlap, unsigned maxMSA, std::string path, unsigned nbThreads);

void runCorrection(std::string PAFIndex, std::string alignmentFile, unsigned minSupport, unsigned maxSupport, unsigned windowSize, unsigned merSize, unsigned commonKMers, unsigned minAnchors, unsigned solidThresh, unsigned windowOverlap, unsigned nbThreads, std::string readsFile, std::string proofFile, unsigned maxMSA, std::string path);
void runCorrection(std::string PAFIndex, std::string alignmentFile, unsigned minSupport, unsigned maxSupport, unsigned windowSize, unsigned merSize, unsigned commonKMers, unsigned minAnchors, unsigned solidThresh, unsigned windowOverlap, unsigned nbThreads, std::string readsFile, std::string proofFile, unsigned maxMSA, std::string path);
8 changes: 4 additions & 4 deletions src/DBG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ std::string concatNucR(std::string f, int i) {
}
}

std::vector<std::string> getNeighbours(std::string kMer, unsigned merSize, int left, std::unordered_map<kmer, unsigned> merCounts, unsigned solidThresh) {
std::vector<std::string> getNeighbours(std::string kMer, unsigned merSize, int left, robin_hood::unordered_map<kmer, unsigned> merCounts, unsigned solidThresh) {
std::vector<std::string> neighbours;
std::string f, n, t = "";
kmer k;
Expand Down Expand Up @@ -53,7 +53,7 @@ std::vector<std::string> getNeighbours(std::string kMer, unsigned merSize, int l
return neighbours;
}

unsigned extendLeft(std::unordered_map<kmer, unsigned> merCounts, unsigned curK, unsigned extLen, string &LR, unsigned solidThresh) {
unsigned extendLeft(robin_hood::unordered_map<kmer, unsigned> merCounts, unsigned curK, unsigned extLen, string &LR, unsigned solidThresh) {
vector<string> neighbours;
vector<string>::iterator it;
unsigned dist = 0;
Expand All @@ -74,7 +74,7 @@ unsigned extendLeft(std::unordered_map<kmer, unsigned> merCounts, unsigned curK,
return dist;
}

unsigned extendRight(std::unordered_map<kmer, unsigned> merCounts, unsigned curK, unsigned extLen, string &LR, unsigned solidThresh) {
unsigned extendRight(robin_hood::unordered_map<kmer, unsigned> merCounts, unsigned curK, unsigned extLen, string &LR, unsigned solidThresh) {
vector<string> neighbours;
vector<string>::iterator it;
unsigned dist = 0;
Expand All @@ -96,7 +96,7 @@ unsigned extendLeft(std::unordered_map<kmer, unsigned> merCounts, unsigned curK,
}


int link(std::unordered_map<kmer, unsigned> merCounts, std::string srcSeed, std::string tgtSeed, unsigned curK, std::set<std::string> &visited, unsigned* curBranches, unsigned dist, std::string curExt, std::string &missingPart, unsigned merSize, unsigned LRLen, unsigned maxBranches, unsigned solidThresh, unsigned minOrder) {
int link(robin_hood::unordered_map<kmer, unsigned> merCounts, std::string srcSeed, std::string tgtSeed, unsigned curK, std::set<std::string> &visited, unsigned* curBranches, unsigned dist, std::string curExt, std::string &missingPart, unsigned merSize, unsigned LRLen, unsigned maxBranches, unsigned solidThresh, unsigned minOrder) {
if (curK < minOrder || *curBranches > maxBranches || dist > LRLen) {
missingPart = std::string();
return 0;
Expand Down
7 changes: 4 additions & 3 deletions src/DBG.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@
#include <unordered_map>
#include "../BMEAN/utils.h"
#include "reverseComplement.h"
#include "robin_hood.h"

using namespace std;

unsigned extendLeft(std::unordered_map<kmer, unsigned> merCounts, unsigned curK, unsigned extLen, string &LR, unsigned solidThresh);
unsigned extendLeft(robin_hood::unordered_map<kmer, unsigned> merCounts, unsigned curK, unsigned extLen, string &LR, unsigned solidThresh);

unsigned extendRight(std::unordered_map<kmer, unsigned> merCounts, unsigned curK, unsigned extLen, string &LR, unsigned solidThresh);
unsigned extendRight(robin_hood::unordered_map<kmer, unsigned> merCounts, unsigned curK, unsigned extLen, string &LR, unsigned solidThresh);

int link(std::unordered_map<kmer, unsigned> mapMerCounts, std::string srcSeed, std::string tgtSeed, unsigned curK, std::set<std::string> &visited, unsigned* curBranches, unsigned dist, std::string curExt, std::string &missingPart, unsigned merSize, unsigned LRLen, unsigned maxBranches, unsigned solidThresh, unsigned minOrder);
int link(robin_hood::unordered_map<kmer, unsigned> mapMerCounts, std::string srcSeed, std::string tgtSeed, unsigned curK, std::set<std::string> &visited, unsigned* curBranches, unsigned dist, std::string curExt, std::string &missingPart, unsigned merSize, unsigned LRLen, unsigned maxBranches, unsigned solidThresh, unsigned minOrder);
59 changes: 59 additions & 0 deletions src/OLDCONSENT.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#include <mutex>
#include <future>
#include <fstream>
#include <sstream>
#include <utility>
#include <vector>
#include <set>
#include <algorithm>
#include <string>
#include <utility>
#include <vector>
#include <algorithm>
#include <string>
#include <iostream>
#include <unistd.h>
#include <map>
#include "../BMEAN/utils.h"
#include "../CTPL/ctpl_stl.h"
#include "robin_hood.h"

struct POASeq {
std::string seq;
int beg;
int end;

bool operator<(const POASeq& s2) const {
if (beg < s2.beg) {
return true;
} else if (beg == s2.beg and end < s2.end) {
return true;
} else {
return false;
}
}

POASeq() {

}

POASeq(std::string s, int b, int e) {
seq = s;
beg = b;
end = e;
}
};

std::string polishCorrection(std::string correctedRead, robin_hood::unordered_map<kmer, unsigned>& merCounts, unsigned merSize, int solidThresh);

// std::vector<std::pair<std::string, std::string>> polishCorrection(std::string correctedRead, std::vector<std::pair<std::pair<int, int>, int>>& corPosPiles, std::vector<std::vector<std::string>>& piles, robin_hood::unordered_map<std::string, unsigned>& pilesMers, unsigned merSize, int solidThresh, int minGap, int maxGap);

// std::vector<std::pair<std::string, std::string>> polishCorrection(std::string correctedRead, std::vector<std::pair<std::pair<int, int>, int>>& corPosPiles, std::vector<std::vector<std::string>>& piles, unsigned merSize, int solidThresh, int minGap, int maxGap);

void removeBadSequences(std::vector<std::string>& sequences, std::string tplSeq, robin_hood::unordered_map<std::string, unsigned>& merCounts, unsigned merSize, unsigned commonKMers, unsigned solidThresh, unsigned windowSize);

std::string alignConsensuses(std::string rawRead, std::string sequence, std::vector<std::string>& consensuses, std::vector<robin_hood::unordered_map<kmer, unsigned>>& merCounts, std::vector<std::pair<unsigned, unsigned>>& pilesPos, std::vector<std::string>& templates, int startPos, unsigned windowSize, unsigned windowOverlap, unsigned solidThresh, unsigned merSize);

void processReads(std::vector<std::vector<std::string>>& reads, unsigned minSupport, unsigned windowSize, unsigned merSize, unsigned commonKMers, unsigned solidThresh, unsigned windowOverlap, std::string path);

void runCorrection(std::string PAFIndex, std::string alignmentFile, unsigned minSupport, unsigned maxSupport, unsigned windowSize, unsigned merSize, unsigned commonKMers, unsigned minAnchors, unsigned solidThresh, unsigned windowOverlap, unsigned nbThreads, std::string readsFile, std::string proofFile, unsigned maxMSA, std::string path);
3 changes: 2 additions & 1 deletion src/Overlap.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <sstream>
#include <iostream>
#include "robin_hood.h"

struct Overlap {
std::string qName;
Expand Down Expand Up @@ -88,4 +89,4 @@ struct Overlap {

};

#endif
#endif
4 changes: 2 additions & 2 deletions src/alignmentPiles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
#include <iostream>
#include "alignmentPiles.h"

std::unordered_map<std::string, std::string> getSequencesMap(std::vector<Overlap>& alignments, std::unordered_map<std::string, std::vector<bool>>& readIndex) {
std::unordered_map<std::string, std::string> sequences;
robin_hood::unordered_map<std::string, std::string> getSequencesMap(std::vector<Overlap>& alignments, robin_hood::unordered_map<std::string, std::vector<bool>>& readIndex) {
robin_hood::unordered_map<std::string, std::string> sequences;
std::string header, seq;

// Insert template sequence
Expand Down
3 changes: 2 additions & 1 deletion src/alignmentPiles.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
#include <unordered_map>
#include "Overlap.h"
#include "utils.h"
#include "robin_hood.h"

std::vector<Overlap> getNextReadPile(std::ifstream& f, unsigned maxSupport);

std::unordered_map<std::string, std::string> getSequencesMap(std::vector<Overlap>& alignments, std::unordered_map<std::string, std::vector<bool>>& readIndex);
robin_hood::unordered_map<std::string, std::string> getSequencesMap(std::vector<Overlap>& alignments, robin_hood::unordered_map<std::string, std::vector<bool>>& readIndex);
4 changes: 2 additions & 2 deletions src/alignmentWindows.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ std::vector<std::pair<unsigned, unsigned>> getAlignmentWindowsPositions(unsigned
return pilesPos;
}

std::vector<std::string> getAlignmentWindowsSequences(std::vector<Overlap>& alignments, unsigned minSupport, unsigned windowSize, unsigned windowOverlap, std::unordered_map<std::string, std::string>& sequences, unsigned qBeg, unsigned end, unsigned merSize, unsigned maxSupport, unsigned commonKMers) {
std::vector<std::string> getAlignmentWindowsSequences(std::vector<Overlap>& alignments, unsigned minSupport, unsigned windowSize, unsigned windowOverlap, robin_hood::unordered_map<std::string, std::string>& sequences, unsigned qBeg, unsigned end, unsigned merSize, unsigned maxSupport, unsigned commonKMers) {
std::vector<std::string> curPile;
std::vector<unsigned> curScore;
unsigned length, shift;
Expand Down Expand Up @@ -146,7 +146,7 @@ std::vector<std::string> getAlignmentWindowsSequences(std::vector<Overlap>& alig
return curPile;
}

std::pair<std::vector<std::pair<unsigned, unsigned>>, std::vector<std::vector<std::string>>> getAlignmentWindows(std::vector<Overlap>& alignments, unsigned minSupport, unsigned maxSupport, unsigned windowSize, unsigned windowOverlap, std::unordered_map<std::string, std::string> sequences, unsigned merSize, unsigned commonKMers) {
std::pair<std::vector<std::pair<unsigned, unsigned>>, std::vector<std::vector<std::string>>> getAlignmentWindows(std::vector<Overlap>& alignments, unsigned minSupport, unsigned maxSupport, unsigned windowSize, unsigned windowOverlap, robin_hood::unordered_map<std::string, std::string> sequences, unsigned merSize, unsigned commonKMers) {
unsigned tplLen = alignments.begin()->qLength;

std::vector<std::pair<unsigned, unsigned>> windowsPos = getAlignmentWindowsPositions(tplLen, alignments, minSupport, maxSupport, windowSize, windowOverlap);
Expand Down
5 changes: 3 additions & 2 deletions src/alignmentWindows.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
#include <unordered_map>
#include "Overlap.h"
#include "reverseComplement.h"
#include "robin_hood.h"

unsigned* getCoverages(std::vector<Overlap>& alignments);

std::vector<std::pair<unsigned, unsigned>> getAlignmentWindowsPositions(unsigned tplLen, std::vector<Overlap>& alignments, unsigned minSupport, unsigned maxSupport, unsigned windowSize, int overlappingWindows);

std::vector<std::string> getAlignmentWindowsSequences(std::vector<Overlap>& alignments, unsigned minSupport, unsigned windowSize, unsigned windowOverlap, std::unordered_map<std::string, std::string>& sequences, unsigned beg, unsigned end, unsigned merSize, unsigned maxSupport, unsigned commonKMers);
std::vector<std::string> getAlignmentWindowsSequences(std::vector<Overlap>& alignments, unsigned minSupport, unsigned windowSize, unsigned windowOverlap, robin_hood::unordered_map<std::string, std::string>& sequences, unsigned beg, unsigned end, unsigned merSize, unsigned maxSupport, unsigned commonKMers);

std::pair<std::vector<std::pair<unsigned, unsigned>>, std::vector<std::vector<std::string>>> getAlignmentWindows(std::vector<Overlap>& alignments, unsigned minSupport, unsigned maxSupport, unsigned windowSize, unsigned windowOverlap, std::unordered_map<std::string, std::string> sequences, unsigned merSize, unsigned commonKMers);
std::pair<std::vector<std::pair<unsigned, unsigned>>, std::vector<std::vector<std::string>>> getAlignmentWindows(std::vector<Overlap>& alignments, unsigned minSupport, unsigned maxSupport, unsigned windowSize, unsigned windowOverlap, robin_hood::unordered_map<std::string, std::string> sequences, unsigned merSize, unsigned commonKMers);
8 changes: 4 additions & 4 deletions src/correctionAlignment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include "utils.h"
#include "../BMEAN/Complete-Striped-Smith-Waterman-Library/src/ssw_cpp.h"

int nbSolidMers(std::string seq, std::unordered_map<kmer, unsigned> merCounts, unsigned merSize, unsigned solidThresh) {
int nbSolidMers(std::string seq, robin_hood::unordered_map<kmer, unsigned> merCounts, unsigned merSize, unsigned solidThresh) {
int nb = 0;
for (unsigned i = 0; i < seq.length() - merSize + 1; i++) {
if (merCounts[str2num(seq.substr(i, merSize))] >= solidThresh) {
Expand Down Expand Up @@ -44,7 +44,7 @@ std::pair<int, int> getIndels(std::string cigar){
return std::make_pair(ins, del);
}

std::string alignConsensus(std::string rawRead, std::string sequence, std::vector<std::string>& consensuses, std::vector<std::unordered_map<kmer, unsigned>>& merCounts, std::vector<std::pair<unsigned, unsigned>>& pilesPos, std::vector<std::string>& templates, int startPos, unsigned windowSize, unsigned windowOverlap, unsigned solidThresh, unsigned merSize) {
std::string alignConsensus(std::string rawRead, std::string sequence, std::vector<std::string>& consensuses, std::vector<robin_hood::unordered_map<kmer, unsigned>>& merCounts, std::vector<std::pair<unsigned, unsigned>>& pilesPos, std::vector<std::string>& templates, int startPos, unsigned windowSize, unsigned windowOverlap, unsigned solidThresh, unsigned merSize) {
StripedSmithWaterman::Aligner aligner;
StripedSmithWaterman::Filter filter;
StripedSmithWaterman::Alignment alignment;
Expand All @@ -63,8 +63,8 @@ std::string alignConsensus(std::string rawRead, std::string sequence, std::vecto
int alPos;
int sizeAl;
std::string curCons, oldCons;
std::unordered_map<kmer, unsigned> oldMers;
std::unordered_map<kmer, unsigned> curMers;
robin_hood::unordered_map<kmer, unsigned> oldMers;
robin_hood::unordered_map<kmer, unsigned> curMers;
unsigned overlap;
std::string seq1, seq2;
int solidMersSeq1, solidMersSeq2;
Expand Down
Loading

0 comments on commit b6f8148

Please sign in to comment.