From 819edbc0071962cf052b3a497d74f32910aa774b Mon Sep 17 00:00:00 2001 From: Sebastian Uhrig Date: Sat, 7 Dec 2019 20:33:57 +0100 Subject: [PATCH 1/2] improve mapping speed by avoiding calls to memset for winBin --- source/FastResetVector.h | 40 +++++++++++++++++++ source/ReadAlign.cpp | 6 +-- source/ReadAlign.h | 3 +- ...ReadAlign_createExtendWindowsWithAlign.cpp | 2 +- source/ReadAlign_stitchPieces.cpp | 5 +-- 5 files changed, 46 insertions(+), 10 deletions(-) create mode 100644 source/FastResetVector.h diff --git a/source/FastResetVector.h b/source/FastResetVector.h new file mode 100644 index 00000000..581b1adf --- /dev/null +++ b/source/FastResetVector.h @@ -0,0 +1,40 @@ +#ifndef H_FastResetVector +#define H_FastResetVector + +#include + +// Implementation of a vector that can be reset to a default value in ~O(1). +// It is more efficient than an ordinary vector if: +// - the values are sparse, such that only a few elements need to be deleted. +// - there are not too many accesses to elements, since upon each access +// it must be checked if the element is stale. + +template class FastResetVector { + + private: + vector data; // contains actual data to be stored + T defaultValue; // all elements of `data` are initialized with this value + unsigned int incarnation = 0; // increasing the incarnation invalidates all elements of `data` (=reset) + vector lastUpdate; // for each element in `data`, keep track of the incarnation that it was last updated + + public: + FastResetVector(const size_t s, const T& d): data(s,d), defaultValue(d), lastUpdate(s) {} + + inline T& operator[](const size_t i) { // whenever an element is accessed, check if it's stale + if (incarnation != lastUpdate[i]) { // is it stale? + data[i] = defaultValue; // reset to defaut value + lastUpdate[i] = incarnation; // mark as fresh for this incarnation + }; + return data[i]; + } + + void reset() { + incarnation++; // we can invalidate `data` simply through "reincarnation" + if (incarnation == 0) // only when there is an integer overflow, we need to reinitialize + fill(data.begin(), data.end(), defaultValue); + } + +}; + +#endif + diff --git a/source/ReadAlign.cpp b/source/ReadAlign.cpp index 3c15ef0c..1f5a9e82 100644 --- a/source/ReadAlign.cpp +++ b/source/ReadAlign.cpp @@ -7,11 +7,7 @@ ReadAlign::ReadAlign (Parameters& Pin, Genome &genomeIn, Transcriptome *TrIn, in : mapGen(genomeIn), P(Pin), chunkTr(TrIn) { readNmates=P.readNmates; - winBin = new uintWinBin* [2]; - winBin[0] = new uintWinBin [P.winBinN]; - winBin[1] = new uintWinBin [P.winBinN]; - memset(winBin[0],255,sizeof(winBin[0][0])*P.winBinN); - memset(winBin[1],255,sizeof(winBin[0][0])*P.winBinN); + winBin.resize(2, FastResetVector(P.winBinN, uintWinBinMax)); //RNGs rngMultOrder.seed(P.runRNGseed*(iChunk+1)); rngUniformReal0to1=std::uniform_real_distribution (0.0, 1.0); diff --git a/source/ReadAlign.h b/source/ReadAlign.h index ad8f1f6b..23c4d50b 100644 --- a/source/ReadAlign.h +++ b/source/ReadAlign.h @@ -13,6 +13,7 @@ #include "ChimericDetection.h" #include "SoloRead.h" #include "ReadAnnotations.h" +#include "FastResetVector.h" #include #include @@ -111,7 +112,7 @@ class ReadAlign { // uint fragLength[MAX_N_FRAG], fragStart[MAX_N_FRAG]; //fragment Lengths and Starts in read space //binned alignments - uintWinBin **winBin; //binned genome: window ID (number) per bin + vector< FastResetVector > winBin; //binned genome: window ID (number) per bin //alignments uiPC *PC; //pieces coordinates diff --git a/source/ReadAlign_createExtendWindowsWithAlign.cpp b/source/ReadAlign_createExtendWindowsWithAlign.cpp index c04fd21b..edd67bda 100644 --- a/source/ReadAlign_createExtendWindowsWithAlign.cpp +++ b/source/ReadAlign_createExtendWindowsWithAlign.cpp @@ -9,7 +9,7 @@ int ReadAlign::createExtendWindowsWithAlign(uint a1, uint aStr) { uint aBin = (a1 >> P.winBinNbits); //align's bin uint iBinLeft=aBin, iBinRight=aBin; - uintWinBin* wB=winBin[aStr]; + FastResetVector& wB=winBin[aStr]; uint iBin=-1, iWin=-1, iWinRight=-1; diff --git a/source/ReadAlign_stitchPieces.cpp b/source/ReadAlign_stitchPieces.cpp index db5c3e74..89c9f6e5 100644 --- a/source/ReadAlign_stitchPieces.cpp +++ b/source/ReadAlign_stitchPieces.cpp @@ -13,9 +13,8 @@ void ReadAlign::stitchPieces(char **R, uint Lread) { //zero-out winBin - memset(winBin[0],255,sizeof(winBin[0][0])*P.winBinN); - memset(winBin[1],255,sizeof(winBin[0][0])*P.winBinN); - + winBin[0].reset(); + winBin[1].reset(); // for (uint iWin=0;iWin Date: Sun, 8 Dec 2019 17:03:53 +0100 Subject: [PATCH 2/2] minor speed improvement: unpack PackedArray using bit mask instead of bit shifting --- source/PackedArray.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/PackedArray.h b/source/PackedArray.h index efaad5b8..48ea7b2e 100644 --- a/source/PackedArray.h +++ b/source/PackedArray.h @@ -27,7 +27,8 @@ inline uint PackedArray::operator [] (uint ii) { uint S=b%8; uint a1 = *((uint*) (charArray+B)); - a1 = ((a1>>S)<>wordCompLength; + a1 >>= S; + a1 &= bitRecMask; return a1; };