From 9b9383a3f0c7a99fef37640823174368f74c1487 Mon Sep 17 00:00:00 2001 From: Cameron Gilchrist Date: Fri, 10 Nov 2023 16:24:56 +0900 Subject: [PATCH] Add scoreBias parameter to computePSSMFromMSA (#775) --- src/alignment/PSSMCalculator.cpp | 13 +++++++------ src/alignment/PSSMCalculator.h | 4 ++-- src/test/TestMultipleAlignment.cpp | 2 +- src/test/TestPSSM.cpp | 2 +- src/test/TestPSSMPrune.cpp | 2 +- src/test/TestProfileAlignment.cpp | 2 +- src/util/expandaln.cpp | 2 +- src/util/msa2profile.cpp | 2 +- src/util/msa2result.cpp | 2 +- src/util/result2msa.cpp | 2 +- src/util/result2profile.cpp | 2 +- 11 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/alignment/PSSMCalculator.cpp b/src/alignment/PSSMCalculator.cpp index ab550b57e..79bafaf8f 100644 --- a/src/alignment/PSSMCalculator.cpp +++ b/src/alignment/PSSMCalculator.cpp @@ -92,7 +92,7 @@ PSSMCalculator::~PSSMCalculator() { // PSSMCalculator::Profile PSSMCalculator::computePSSMFromMSA(size_t setSize, // size_t queryLength, // const char **msaSeqs, -// bool wg) { +// bool wg, 0.0) { // increaseSetSize(setSize); // // Quick and dirty calculation of the weight per sequence wg[k] // computeSequenceWeights(seqWeight, queryLength, setSize, msaSeqs); @@ -135,10 +135,10 @@ PSSMCalculator::~PSSMCalculator() { //// PSSMCalculator::printPSSM(queryLength); // } // this overload is only used in TestProfileAlignment.cpp and TestPSSM.cpp -PSSMCalculator::Profile PSSMCalculator::computePSSMFromMSA(size_t setSize, size_t queryLength, const char **msaSeqs, bool wg) { +PSSMCalculator::Profile PSSMCalculator::computePSSMFromMSA(size_t setSize, size_t queryLength, const char **msaSeqs, bool wg, float scoreBias) { #ifdef GAP_POS_SCORING std::vector dummy; - return computePSSMFromMSA(setSize, queryLength, msaSeqs, dummy, wg); + return computePSSMFromMSA(setSize, queryLength, msaSeqs, dummy, wg, scoreBias); } #endif @@ -146,7 +146,8 @@ PSSMCalculator::Profile PSSMCalculator::computePSSMFromMSA(size_t setSize, size_ PSSMCalculator::Profile PSSMCalculator::computePSSMFromMSA( size_t setSize, size_t queryLength, const char **msaSeqs, const std::vector &alnResults, - bool wg + bool wg, + float scoreBias ) { #endif increaseSetSize(setSize); @@ -193,7 +194,7 @@ PSSMCalculator::Profile PSSMCalculator::computePSSMFromMSA( // PSSMCalculator::printPSSM(queryLength); // create final Matrix - computeLogPSSM(subMat, pssm, profile, 8.0, queryLength, 0.0); + computeLogPSSM(subMat, pssm, profile, 8.0, queryLength, scoreBias); // PSSMCalculator::printProfile(queryLength); // PSSMCalculator::printPSSM(queryLength); #ifdef GAP_POS_SCORING @@ -244,7 +245,7 @@ void PSSMCalculator::computeLogPSSM(BaseMatrix *subMat, char *pssm, const float const float aaProb = profile[pos * Sequence::PROFILE_AA_SIZE + aa]; const unsigned int idx = pos * Sequence::PROFILE_AA_SIZE + aa; float logProb = MathUtil::flog2(aaProb / subMat->pBack[aa]); - float pssmVal = bitFactor * logProb + scoreBias; + float pssmVal = bitFactor * logProb + bitFactor * scoreBias; pssmVal = static_cast((pssmVal < 0.0) ? pssmVal - 0.5 : pssmVal + 0.5); float truncPssmVal = std::min(pssmVal, 127.0f); truncPssmVal = std::max(-128.0f, truncPssmVal); diff --git a/src/alignment/PSSMCalculator.h b/src/alignment/PSSMCalculator.h index 579433cd4..146475b2d 100644 --- a/src/alignment/PSSMCalculator.h +++ b/src/alignment/PSSMCalculator.h @@ -47,9 +47,9 @@ class PSSMCalculator { ~PSSMCalculator(); - Profile computePSSMFromMSA(size_t setSize, size_t queryLength, const char **msaSeqs, bool wg); + Profile computePSSMFromMSA(size_t setSize, size_t queryLength, const char **msaSeqs, bool wg, float scoreBias); #ifdef GAP_POS_SCORING - Profile computePSSMFromMSA(size_t setSize, size_t queryLength, const char **msaSeqs, const std::vector &alnResults, bool wg); + Profile computePSSMFromMSA(size_t setSize, size_t queryLength, const char **msaSeqs, const std::vector &alnResults, bool wg, float scoreBias); #endif void printProfile(size_t queryLength); diff --git a/src/test/TestMultipleAlignment.cpp b/src/test/TestMultipleAlignment.cpp index 495739708..38c7aec16 100644 --- a/src/test/TestMultipleAlignment.cpp +++ b/src/test/TestMultipleAlignment.cpp @@ -88,7 +88,7 @@ int main(int, const char**) { #ifdef GAP_POS_SCORING , alnResults #endif - , false + , false, 0.0 ); pssm.printProfile(res.centerLength); pssm.printPSSM(res.centerLength); diff --git a/src/test/TestPSSM.cpp b/src/test/TestPSSM.cpp index 7feb3ef71..4e38f9dca 100644 --- a/src/test/TestPSSM.cpp +++ b/src/test/TestPSSM.cpp @@ -1611,7 +1611,7 @@ int main (int, const char**) { , par.gapPseudoCount #endif ); - pssm.computePSSMFromMSA(filteredSetSize, res.centerLength, (const char**) res.msaSequence, false); + pssm.computePSSMFromMSA(filteredSetSize, res.centerLength, (const char**) res.msaSequence, false, 0.0); //pssm.printProfile(res.centerLength); pssm.printPSSM(res.centerLength); MultipleAlignment::deleteMSA(&res); diff --git a/src/test/TestPSSMPrune.cpp b/src/test/TestPSSMPrune.cpp index e3428f55c..cb36036f6 100644 --- a/src/test/TestPSSMPrune.cpp +++ b/src/test/TestPSSMPrune.cpp @@ -90,7 +90,7 @@ int main (int, const char**) { //seqSet.push_back(s5); // PSSMCalculator pssm(&subMat, counter, 1.0, 1.5); -// pssm.computePSSMFromMSA(filterResult.setSize, res.centerLength, filterResult.filteredMsaSequence, false); +// pssm.computePSSMFromMSA(filterResult.setSize, res.centerLength, filterResult.filteredMsaSequence, false, 0.0); // //pssm.printProfile(res.centerLength); // pssm.printPSSM(res.centerLength); MultipleAlignment::deleteMSA(&res); diff --git a/src/test/TestProfileAlignment.cpp b/src/test/TestProfileAlignment.cpp index ad6338379..6e54ad657 100644 --- a/src/test/TestProfileAlignment.cpp +++ b/src/test/TestProfileAlignment.cpp @@ -774,7 +774,7 @@ int main (int, const char**) { 21 : subMat.aa2num[(int) msaSeq[k][pos]]; } } - PSSMCalculator::Profile pssmRet = pssmCalculator.computePSSMFromMSA(setSize,centerSeqSize, (const char **) msaSequence, false); + PSSMCalculator::Profile pssmRet = pssmCalculator.computePSSMFromMSA(setSize,centerSeqSize, (const char **) msaSequence, false, 0.0); const char * sequence = pssmRet.pssm; char * data = new char[centerSeqSize*20+1]; for (size_t i = 0; i < centerSeqSize*20; i++) { diff --git a/src/util/expandaln.cpp b/src/util/expandaln.cpp index b43630f19..ebda49dec 100644 --- a/src/util/expandaln.cpp +++ b/src/util/expandaln.cpp @@ -393,7 +393,7 @@ int expandaln(int argc, const char **argv, const Command& command, bool returnAl (int)(par.filterMaxSeqId * 100), par.Ndiff, par.filterMinEnable, (const char **) res.msaSequence, true) : res.setSize; - PSSMCalculator::Profile pssmRes = calculator->computePSSMFromMSA(filteredSetSize, aSeq.L, (const char **) res.msaSequence, par.wg); + PSSMCalculator::Profile pssmRes = calculator->computePSSMFromMSA(filteredSetSize, aSeq.L, (const char **) res.msaSequence, par.wg, 0.0); if (par.maskProfile == true) { masker->mask(aSeq, par.maskProb, pssmRes); } diff --git a/src/util/msa2profile.cpp b/src/util/msa2profile.cpp index e6fb0a2f4..0189cfd45 100644 --- a/src/util/msa2profile.cpp +++ b/src/util/msa2profile.cpp @@ -413,7 +413,7 @@ int msa2profile(int argc, const char **argv, const Command &command) { #ifdef GAP_POS_SCORING alnResults, #endif - par.wg); + par.wg, 0.0); if (par.compBiasCorrection == true) { SubstitutionMatrix::calcGlobalAaBiasCorrection(&subMat, pssmRes.pssm, pNullBuffer, Sequence::PROFILE_AA_SIZE, diff --git a/src/util/msa2result.cpp b/src/util/msa2result.cpp index 5ed652185..4dbc93c26 100644 --- a/src/util/msa2result.cpp +++ b/src/util/msa2result.cpp @@ -384,7 +384,7 @@ int msa2result(int argc, const char **argv, const Command &command) { static_cast(par.filterMaxSeqId * 100), par.Ndiff, par.filterMinEnable, (const char **) msaSequences, true); } - PSSMCalculator::Profile pssmRes = calculator.computePSSMFromMSA(filteredSetSize, centerLength, (const char **) msaSequences, par.wg); + PSSMCalculator::Profile pssmRes = calculator.computePSSMFromMSA(filteredSetSize, centerLength, (const char **) msaSequences, par.wg, 0.0); resultWriter.writeStart(thread_idx); for (size_t i = 0; i < setSize; ++i) { diff --git a/src/util/result2msa.cpp b/src/util/result2msa.cpp index 47689b49d..57cdc33b5 100644 --- a/src/util/result2msa.cpp +++ b/src/util/result2msa.cpp @@ -503,7 +503,7 @@ int result2msa(int argc, const char **argv, const Command &command) { #ifdef GAP_POS_SCORING alnResults, #endif - par.wg); + par.wg, 0.0); result.append(">consensus_"); result.append(centerSequenceHeader, centerHeaderLength); for (int pos = 0; pos < centerSequence.L; pos++) { diff --git a/src/util/result2profile.cpp b/src/util/result2profile.cpp index a00dabccf..34759b171 100644 --- a/src/util/result2profile.cpp +++ b/src/util/result2profile.cpp @@ -260,7 +260,7 @@ int result2profile(int argc, const char **argv, const Command &command, bool ret #ifdef GAP_POS_SCORING alnResults, #endif - par.wg); + par.wg, 0.0); if (par.compBiasCorrection == true){ SubstitutionMatrix::calcGlobalAaBiasCorrection(&subMat, pssmRes.pssm, pNullBuffer, Sequence::PROFILE_AA_SIZE,