Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/iqtree/iqtree2 into seq-e…
Browse files Browse the repository at this point in the history
…rror-model
  • Loading branch information
bqminh committed Jul 8, 2020
2 parents 46bedd7 + 1036c2e commit 85259eb
Show file tree
Hide file tree
Showing 43 changed files with 659 additions and 658 deletions.
438 changes: 225 additions & 213 deletions alignment/alignment.cpp

Large diffs are not rendered by default.

16 changes: 12 additions & 4 deletions alignment/alignment.h
Original file line number Diff line number Diff line change
Expand Up @@ -335,21 +335,21 @@ class Alignment : public vector<Pattern>, public CharSet, public StateSpace {
/**
@return number of sequences
*/
inline int getNSeq() const {
inline size_t getNSeq() const {
return seq_names.size();
}

/**
@return number of sites (alignment columns)
*/
inline int getNSite() {
inline size_t getNSite() {
return site_pattern.size();
}

/**
@return number of patterns
*/
inline int getNPattern() {
inline size_t getNPattern() {
return size();
}

Expand Down Expand Up @@ -429,6 +429,14 @@ class Alignment : public vector<Pattern>, public CharSet, public StateSpace {
*/
virtual Alignment *removeIdenticalSeq(string not_remove, bool keep_two, StrVector &removed_seqs, StrVector &target_seqs);

/**
* calculating hashes for sequences
* @param v state at a given site, in the sequence being hashed
* @param hash running hash value for the sequence (modified)
*/
void adjustHash(StateType v, size_t& hash) const;
void adjustHash(bool v, size_t& hash) const;

/**
Quit if some sequences contain only gaps or missing data
*/
Expand All @@ -443,7 +451,7 @@ class Alignment : public vector<Pattern>, public CharSet, public StateSpace {
@return TRUE if seq_id contains only gaps or missing characters
@param seq_id sequence ID
*/
bool isGapOnlySeq(int seq_id);
bool isGapOnlySeq(size_t seq_id);

virtual bool isSuperAlignment() {
return false;
Expand Down
2 changes: 1 addition & 1 deletion alignment/alignmentpairwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ double AlignmentPairwise::optimizeDist(double initial_dist) {
return optimizeDist(initial_dist, d2l);
}

double AlignmentPairwise::computeDist
double AlignmentPairwise::recomputeDist
( int seq1, int seq2, double initial_dist, double &d2l ) {
//Only called when -experimental has been passed
if (initial_dist == 0.0) {
Expand Down
2 changes: 1 addition & 1 deletion alignment/alignmentpairwise.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class AlignmentPairwise : public Alignment, public Optimization
@return a new estimate of branch length
*/

virtual double computeDist( int seq1, int seq2, double initial_dist, double &d2l );
virtual double recomputeDist( int seq1, int seq2, double initial_dist, double &d2l );

/**
destructor
Expand Down
53 changes: 27 additions & 26 deletions alignment/alignmentsummary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

#include "alignment.h"
#include "alignmentsummary.h"
#include <boost/scoped_array.hpp>

AlignmentSummary::AlignmentSummary(const Alignment* a, bool keepConstSites) {
alignment = a;
Expand All @@ -31,18 +30,20 @@ AlignmentSummary::AlignmentSummary(const Alignment* a, bool keepConstSites) {
SiteSummary(): isConst(false), frequency(0), minState(0), maxState(0) {}
};

boost::scoped_array<SiteSummary> siteArray( new SiteSummary[alignment->size()] );
size_t siteCount = alignment->size();
std::vector<SiteSummary> sites;
sites.reserve(siteCount);
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int site=0; site<alignment->size(); ++site) { //per site
auto itSite = alignment->begin() + site;
SiteSummary &s = *(siteArray.get() + site);
for (size_t site=0; site<siteCount; ++site) { //per site
auto itSite = alignment->begin() + site;
SiteSummary &s = sites[site];
StateType minStateForSite = (*itSite)[0];
StateType maxStateForSite = minStateForSite;
s.isConst = itSite->isConst();
s.frequency = itSite->frequency;
for (int seq=1; seq<sequenceCount; ++seq) {
for (size_t seq=1; seq<sequenceCount; ++seq) {
auto state = (*itSite)[seq] ;
if (state < minStateForSite ) {
minStateForSite = state;
Expand All @@ -55,37 +56,37 @@ AlignmentSummary::AlignmentSummary(const Alignment* a, bool keepConstSites) {
s.maxState = maxStateForSite;
}
sequenceLength = 0; //Number sites where there's some variability
SiteSummary* s = siteArray.get();
std::map<int, int> map = stateToSumOfConstantSiteFrequencies;
for (int i=0; i<alignment->size(); ++i, ++s) {
for (size_t site=0; site<siteCount; ++site) {
SiteSummary &s = sites[site];
bool alreadyCounted = false;
totalFrequency += s->frequency;
totalFrequencyOfNonConstSites += s->isConst ? s->frequency : 0;
if ( keepConstSites || !s->isConst) {
if ( 0 < s->frequency && s->minState < s->maxState) {
totalFrequency += s.frequency;
totalFrequencyOfNonConstSites += s.isConst ? 0 : s.frequency;
if ( keepConstSites || !s.isConst) {
if ( 0 < s.frequency && s.minState < s.maxState) {
alreadyCounted = true;
if (sequenceLength==0) {
minState = s->minState;
maxState = s->maxState;
minState = s.minState;
maxState = s.maxState;
} else {
if (s->minState < minState) {
minState = s->minState;
if (s.minState < minState) {
minState = s.minState;
}
if (maxState < s->maxState) {
maxState = s->maxState;
if (maxState < s.maxState) {
maxState = s.maxState;
}
}
siteNumbers.emplace_back(i);
siteFrequencies.emplace_back(s->frequency);
nonConstSiteFrequencies.emplace_back(s->isConst ? 0 : s->frequency);
siteNumbers.emplace_back(site);
siteFrequencies.emplace_back(s.frequency);
nonConstSiteFrequencies.emplace_back(s.isConst ? 0 : s.frequency);
++sequenceLength;
}
}
if (s->minState == s->maxState && !alreadyCounted) {
if (map.find(s->minState)==map.end()) {
map[s->minState] = s->frequency;
if (s.minState == s.maxState && !alreadyCounted) {
if (map.find(s.minState)==map.end()) {
map[s.minState] = s.frequency;
} else {
map[s->minState] += s->frequency;
map[s.minState] += s.frequency;
}
}
}
Expand All @@ -98,7 +99,7 @@ AlignmentSummary::~AlignmentSummary() {
sequenceCount = 0;
}

int AlignmentSummary::getSumOfConstantSiteFrequenciesForState(int state) {
size_t AlignmentSummary::getSumOfConstantSiteFrequenciesForState(int state) {
auto it = stateToSumOfConstantSiteFrequencies.find(state);
return (it == stateToSumOfConstantSiteFrequencies.end()) ? 0 : it->second;
}
Expand Down
6 changes: 3 additions & 3 deletions alignment/alignmentsummary.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ struct AlignmentSummary
std::vector<int> nonConstSiteFrequencies; //ditto, but zeroed if site
//isConst according to alignment
std::map<int, int> stateToSumOfConstantSiteFrequencies;
int totalFrequency; //sum of frequencies (*including* constant sites!)
int totalFrequencyOfNonConstSites; //ditto (*excluding* constant sites!)
size_t totalFrequency; //sum of frequencies (*including* constant sites!)
size_t totalFrequencyOfNonConstSites; //ditto (*excluding* constant sites!)
StateType minState; //found on any site where there is variation
StateType maxState; //ditto
char* sequenceMatrix;
size_t sequenceLength; //Sequence length
size_t sequenceCount; //The number of sequences
int getSumOfConstantSiteFrequenciesForState(int state);
size_t getSumOfConstantSiteFrequenciesForState(int state);
bool constructSequenceMatrix(bool treatAllAmbiguousStatesAsUnknown);
};

Expand Down
13 changes: 6 additions & 7 deletions alignment/maalignment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ IntVector MaAlignment::computeExpectedNorFre()
if ( logLL.empty())
outError("Error: log likelihood of patterns are not given!");

int patNum = getNPattern();
int alignLen = getNSite();
size_t patNum = getNPattern();
size_t alignLen = getNSite();
//resize the expectedNorFre vector
expectedNorFre.resize(patNum,-1);

Expand Down Expand Up @@ -144,13 +144,12 @@ void MaAlignment::printPatObsExpFre(const char *fileName, const IntVector expect
out.open(fileName);
out << "Pattern\tLogLL\tObservedFre\tExpectedFre" << endl;

int patNum = getNPattern();
int seqNum = getNSeq();
int seqID;
size_t patNum = getNPattern();
size_t seqNum = getNSeq();

for ( int i = 0; i < patNum; i++ )
for ( size_t i = 0; i < patNum; ++i )
{
for ( seqID = 0; seqID < seqNum; seqID++ ){
for ( size_t seqID = 0; seqID < seqNum; ++seqID ){
out << convertStateBackStr(at(i)[seqID]);
}
out << "\t" << logLL[i] << "\t" << (*this)[i].frequency << "\t" << expectedNorFre[i] << endl;
Expand Down
4 changes: 2 additions & 2 deletions alignment/pattern.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Pattern::Pattern()
// is_const = false;
// is_informative = false;
flag = 0;
const_char = 255;
const_char = -1;
num_chars = 0;
}

Expand All @@ -30,7 +30,7 @@ Pattern::Pattern(int nseq, int freq)
// is_const = false;
// is_informative = false;
flag = 0;
const_char = 255;
const_char = -1;
num_chars = 0;
}

Expand Down
Loading

0 comments on commit 85259eb

Please sign in to comment.