diff --git a/alignment/alignment.cpp b/alignment/alignment.cpp index 1f48c933c..c8e1bbd0a 100644 --- a/alignment/alignment.cpp +++ b/alignment/alignment.cpp @@ -22,7 +22,6 @@ #ifdef USE_BOOST #include #include -#include #endif @@ -85,14 +84,14 @@ vector& Alignment::getSeqNames() { } int Alignment::getSeqID(string &seq_name) { - for (int i = 0; i < getNSeq(); i++) + for (size_t i = 0; i < getNSeq(); i++) if (seq_name == getSeqName(i)) return i; return -1; } int Alignment::getMaxSeqNameLength() { int len = 0; - for (int i = 0; i < getNSeq(); i++) + for (size_t i = 0; i < getNSeq(); i++) if (getSeqName(i).length() > len) len = getSeqName(i).length(); return len; @@ -173,11 +172,9 @@ void Alignment::checkSeqName() { } double *state_freq = new double[num_states]; -// double *freq_per_sequence = new double[num_states*getNSeq()]; double *freq_per_sequence = new double[num_states]; unsigned *count_per_seq = new unsigned[num_states*getNSeq()]; computeStateFreq(state_freq); -// computeStateFreqPerSequence(freq_per_sequence); countStatePerSequence(count_per_seq); int i, df = -1; @@ -287,13 +284,14 @@ void Alignment::checkSeqName() { int Alignment::checkIdenticalSeq() { + //Todo: This should use sequence hashing. int num_identical = 0; IntVector checked; checked.resize(getNSeq(), 0); - for (int seq1 = 0; seq1 < getNSeq(); seq1++) { + for (size_t seq1 = 0; seq1 < getNSeq(); ++seq1) { if (checked[seq1]) continue; bool first = true; - for (int seq2 = seq1+1; seq2 < getNSeq(); seq2++) { + for (size_t seq2 = seq1+1; seq2 < getNSeq(); ++seq2) { bool equal_seq = true; for (iterator it = begin(); it != end(); it++) if ((*it)[seq1] != (*it)[seq2]) { @@ -336,7 +334,7 @@ Alignment *Alignment::removeIdenticalSeq(string not_remove, bool keep_two, StrVe for (int seq1=0; seq1 0) { - if (removed_seqs.size() >= getNSeq()-3) + if (removed_seqs.size() + 3 >= getNSeq()) outWarning("Your alignment contains too many identical sequences!"); IntVector keep_seqs; - for (int seq1 = 0; seq1 < getNSeq(); seq1++) - if (!removed[seq1]) keep_seqs.push_back(seq1); + for (size_t seq1 = 0; seq1 < getNSeq(); seq1++) + if (!removed[seq1]) { + keep_seqs.emplace_back(seq1); + } Alignment *aln = new Alignment; aln->extractSubAlignment(this, keep_seqs, 0); //cout << "NOTE: Identified " << removed_seqs.size() @@ -397,7 +397,21 @@ Alignment *Alignment::removeIdenticalSeq(string not_remove, bool keep_two, StrVe } else return this; } -bool Alignment::isGapOnlySeq(int seq_id) { +void Alignment::adjustHash(StateType v, size_t& hash) const { + //Based on what boost::hash_combine() does. + //For now there's no need for a templated version + //in a separate header file. But if other classes start + //wanting to "roll their own hashing" this should move + //to, say, utils/hashing.h. + hash ^= std::hash()(v) + 0x9e3779b9 + + (hash<<6) + (hash>>2); +} +void Alignment::adjustHash(bool v, size_t& hash) const { + hash ^= std::hash()(v) + 0x9e3779b9 + + (hash<<6) + (hash>>2); +} + +bool Alignment::isGapOnlySeq(size_t seq_id) { ASSERT(seq_id < getNSeq()); for (iterator it = begin(); it != end(); it++) if ((*it)[seq_id] != STATE_UNKNOWN) { @@ -408,8 +422,8 @@ bool Alignment::isGapOnlySeq(int seq_id) { Alignment *Alignment::removeGappySeq() { IntVector keep_seqs; - int i, nseq = getNSeq(); - for (i = 0; i < nseq; i++) + size_t nseq = getNSeq(); + for (size_t i = 0; i < nseq; i++) if (! isGapOnlySeq(i)) { keep_seqs.push_back(i); } @@ -417,7 +431,7 @@ Alignment *Alignment::removeGappySeq() { return this; // 2015-12-03: if resulting alignment has too few seqs, try to add some back if (keep_seqs.size() < 3 && getNSeq() >= 3) { - for (i = 0; i < nseq && keep_seqs.size() < 3; i++) + for (size_t i = 0; i < nseq && keep_seqs.size() < 3; i++) if (isGapOnlySeq(i)) keep_seqs.push_back(i); } @@ -427,9 +441,9 @@ Alignment *Alignment::removeGappySeq() { } void Alignment::checkGappySeq(bool force_error) { - int nseq = getNSeq(), i; + size_t nseq = getNSeq(); int wrong_seq = 0; - for (i = 0; i < nseq; i++) + for (size_t i = 0; i < nseq; i++) if (isGapOnlySeq(i)) { outWarning("Sequence " + getSeqName(i) + " contains only gaps or missing data"); wrong_seq++; @@ -923,8 +937,8 @@ void Alignment::computeConst(Pattern &pat) { void Alignment::printSiteInfo(ostream &out, int part_id) { - int nsite = getNSite(); - for (int site = 0; site != nsite; site++) { + size_t nsite = getNSite(); + for (size_t site = 0; site != nsite; site++) { Pattern ptn = getPattern(site); if (part_id >= 0) out << part_id << "\t"; @@ -1007,27 +1021,28 @@ void Alignment::addConstPatterns(char *freq_const_patterns) { if (vec.size() != num_states) outError("Const pattern frequency vector has different number of states: ", freq_const_patterns); - int nsite = getNSite(), orig_nsite = getNSite(); - int i; - for (i = 0; i < vec.size(); i++) { + size_t nsite = getNSite(); + size_t orig_nsite = getNSite(); + for (size_t i = 0; i < vec.size(); i++) { nsite += vec[i]; if (vec[i] < 0) outError("Const pattern frequency must be non-negative"); } site_pattern.resize(nsite, -1); - int nseq = getNSeq(); + size_t nseq = getNSeq(); nsite = orig_nsite; - for (i = 0; i < vec.size(); i++) if (vec[i] > 0) { - Pattern pat; - pat.resize(nseq, i); -// if (pattern_index.find(pat) != pattern_index.end()) { -// outWarning("Constant pattern of all " + convertStateBackStr(i) + " already exists"); -// } - for (int j = 0; j < vec[i]; j++) - addPattern(pat, nsite++, 1); - } + for (size_t i = 0; i < vec.size(); i++) { + if (vec[i] > 0) { + Pattern pat; + pat.resize(nseq, i); + //if (pattern_index.find(pat) != pattern_index.end()) { + // outWarning("Constant pattern of all " + convertStateBackStr(i) + " already exists"); + //} + for (int j = 0; j < vec[i]; j++) + addPattern(pat, nsite++, 1); + } + } countConstSite(); -// buildSeqStates(); } void Alignment::orderPatternByNumChars(int pat_type) { @@ -1107,7 +1122,7 @@ void Alignment::ungroupSitePattern() { vector stored_pat = (*this); clear(); - for (int i = 0; i < getNSite(); i++) { + for (size_t i = 0; i < getNSite(); ++i) { Pattern pat = stored_pat[getPatternID(i)]; pat.frequency = 1; push_back(pat); @@ -1123,10 +1138,10 @@ void Alignment::regroupSitePattern(int groups, IntVector& site_group) clear(); site_pattern.clear(); site_pattern.resize(stored_site_pattern.size(), -1); - int count = 0; + size_t count = 0; for (int g = 0; g < groups; g++) { pattern_index.clear(); - for (int i = 0; i < site_group.size(); i++) + for (size_t i = 0; i < site_group.size(); ++i) if (site_group[i] == g) { count++; Pattern pat = stored_pat[stored_site_pattern[i]]; @@ -1135,7 +1150,7 @@ void Alignment::regroupSitePattern(int groups, IntVector& site_group) } ASSERT(count == stored_site_pattern.size()); count = 0; - for (iterator it = begin(); it != end(); it++) + for (iterator it = begin(); it != end(); ++it) count += it->frequency; ASSERT(count == getNSite()); pattern_index.clear(); @@ -2849,15 +2864,14 @@ void Alignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_t site_pattern.resize(aln->getNSite(), -1); clear(); pattern_index.clear(); - int site = 0, removed_sites = 0; + size_t removed_sites = 0; VerboseMode save_mode = verbose_mode; verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern -// for (iterator pit = aln->begin(); pit != aln->end(); pit++) { - for (site = 0; site < aln->getNSite(); site++) { + for (size_t site = 0; site < aln->getNSite(); ++site) { iterator pit = aln->begin() + (aln->getPatternID(site)); Pattern pat; int true_char = 0; - for (it = seq_id.begin(); it != seq_id.end(); it++) { + for (it = seq_id.begin(); it != seq_id.end(); ++it) { char ch = (*pit)[*it]; if (ch != STATE_UNKNOWN) true_char++; pat.push_back(ch); @@ -2866,8 +2880,6 @@ void Alignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_t removed_sites++; else addPattern(pat, site-removed_sites); -// for (int i = 0; i < (*pit).frequency; i++) -// site_pattern[site++] = size()-1; } site_pattern.resize(aln->getNSite() - removed_sites); verbose_mode = save_mode; @@ -2880,8 +2892,7 @@ void Alignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_t void Alignment::extractPatterns(Alignment *aln, IntVector &ptn_id) { - int i; - for (i = 0; i < aln->getNSeq(); i++) { + for (size_t i = 0; i < aln->getNSeq(); ++i) { seq_names.push_back(aln->getSeqName(i)); } name = aln->name; @@ -2905,7 +2916,7 @@ void Alignment::extractPatterns(Alignment *aln, IntVector &ptn_id) { int site = 0; VerboseMode save_mode = verbose_mode; verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern - for (i = 0; i != ptn_id.size(); i++) { + for (size_t i = 0; i != ptn_id.size(); ++i) { ASSERT(ptn_id[i] >= 0 && ptn_id[i] < aln->getNPattern()); Pattern pat = aln->at(ptn_id[i]); addPattern(pat, site, aln->at(ptn_id[i]).frequency); @@ -2920,9 +2931,8 @@ void Alignment::extractPatterns(Alignment *aln, IntVector &ptn_id) { } void Alignment::extractPatternFreqs(Alignment *aln, IntVector &ptn_freq) { - int i; ASSERT(ptn_freq.size() <= aln->getNPattern()); - for (i = 0; i < aln->getNSeq(); i++) { + for (size_t i = 0; i < aln->getNSeq(); ++i) { seq_names.push_back(aln->getSeqName(i)); } name = aln->name; @@ -2946,7 +2956,7 @@ void Alignment::extractPatternFreqs(Alignment *aln, IntVector &ptn_freq) { int site = 0; VerboseMode save_mode = verbose_mode; verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern - for (i = 0; i != ptn_freq.size(); i++) + for (size_t i = 0; i != ptn_freq.size(); ++i) if (ptn_freq[i]) { ASSERT(ptn_freq[i] > 0); Pattern pat = aln->at(i); @@ -2962,8 +2972,7 @@ void Alignment::extractPatternFreqs(Alignment *aln, IntVector &ptn_freq) { } void Alignment::extractSites(Alignment *aln, IntVector &site_id) { - int i; - for (i = 0; i < aln->getNSeq(); i++) { + for (size_t i = 0; i < aln->getNSeq(); ++i) { seq_names.push_back(aln->getSeqName(i)); } name = aln->name; @@ -2986,7 +2995,7 @@ void Alignment::extractSites(Alignment *aln, IntVector &site_id) { pattern_index.clear(); VerboseMode save_mode = verbose_mode; verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern - for (i = 0; i != site_id.size(); i++) { + for (size_t i = 0; i != site_id.size(); i++) { Pattern pat = aln->getPattern(site_id[i]); addPattern(pat, i); } @@ -2994,9 +3003,11 @@ void Alignment::extractSites(Alignment *aln, IntVector &site_id) { countConstSite(); // buildSeqStates(); // sanity check - for (iterator it = begin(); it != end(); it++) - if (it->at(0) == -1) + for (iterator it = begin(); it != end(); it++) { + if (it->at(0) == -1) { ASSERT(0); + } + } //cout << getNSite() << " positions were extracted" << endl; //cout << __func__ << " " << num_states << endl; @@ -3009,9 +3020,8 @@ void Alignment::convertToCodonOrAA(Alignment *aln, char *gene_code_id, bool nt2a outError("Cannot convert non-DNA alignment into codon alignment"); if (aln->getNSite() % 3 != 0) outError("Sequence length is not divisible by 3 when converting to codon sequences"); - int i, site; char AA_to_state[NUM_CHAR]; - for (i = 0; i < aln->getNSeq(); i++) { + for (size_t i = 0; i < aln->getNSeq(); i++) { seq_names.push_back(aln->getSeqName(i)); } name = aln->name; @@ -3040,15 +3050,15 @@ void Alignment::convertToCodonOrAA(Alignment *aln, char *gene_code_id, bool nt2a VerboseMode save_mode = verbose_mode; verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern - int nsite = aln->getNSite(); - int nseq = aln->getNSeq(); + size_t nsite = aln->getNSite(); + size_t nseq = aln->getNSeq(); Pattern pat; pat.resize(nseq); int num_error = 0; ostringstream err_str; - for (site = 0; site < nsite; site+=step) { - for (int seq = 0; seq < nseq; seq++) { + for (size_t site = 0; site < nsite; site+=step) { + for (size_t seq = 0; seq < nseq; ++seq) { //char state = convertState(sequences[seq][site], seq_type); char state = aln->at(aln->getPatternID(site))[seq]; // special treatment for codon @@ -3107,9 +3117,8 @@ Alignment *Alignment::convertCodonToAA() { Alignment *res = new Alignment; if (seq_type != SEQ_CODON) outError("Cannot convert non-codon alignment into AA"); - int i, site; char AA_to_state[NUM_CHAR]; - for (i = 0; i < getNSeq(); i++) { + for (size_t i = 0; i < getNSeq(); ++i) { res->seq_names.push_back(getSeqName(i)); } res->name = name; @@ -3130,13 +3139,13 @@ Alignment *Alignment::convertCodonToAA() { VerboseMode save_mode = verbose_mode; verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern - int nsite = getNSite(); - int nseq = getNSeq(); + size_t nsite = getNSite(); + size_t nseq = getNSeq(); Pattern pat; pat.resize(nseq); - for (site = 0; site < nsite; site++) { - for (int seq = 0; seq < nseq; seq++) { + for (size_t site = 0; site < nsite; ++site) { + for (size_t seq = 0; seq < nseq; ++seq) { StateType state = at(getPatternID(site))[seq]; if (state == STATE_UNKNOWN) state = res->STATE_UNKNOWN; @@ -3148,7 +3157,6 @@ Alignment *Alignment::convertCodonToAA() { } verbose_mode = save_mode; res->countConstSite(); -// res->buildSeqStates(); return res; } @@ -3156,8 +3164,7 @@ Alignment *Alignment::convertCodonToDNA() { Alignment *res = new Alignment; if (seq_type != SEQ_CODON) outError("Cannot convert non-codon alignment into DNA"); - int i, site; - for (i = 0; i < getNSeq(); i++) { + for (size_t i = 0; i < getNSeq(); ++i) { res->seq_names.push_back(getSeqName(i)); } res->name = name; @@ -3176,19 +3183,18 @@ Alignment *Alignment::convertCodonToDNA() { VerboseMode save_mode = verbose_mode; verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern - int nsite = getNSite(); - int nseq = getNSeq(); + size_t nsite = getNSite(); + size_t nseq = getNSeq(); Pattern pat[3]; pat[0].resize(nseq); pat[1].resize(nseq); pat[2].resize(nseq); - for (site = 0; site < nsite; site++) { - int i; - for (int seq = 0; seq < nseq; seq++) { + for (size_t site = 0; site < nsite; ++site) { + for (size_t seq = 0; seq < nseq; ++seq) { StateType state = at(getPatternID(site))[seq]; if (state == STATE_UNKNOWN) { - for (i = 0; i < 3; i++) + for (int i = 0; i < 3; ++i) pat[i][seq] = res->STATE_UNKNOWN; } else { state = codon_table[state]; @@ -3197,7 +3203,7 @@ Alignment *Alignment::convertCodonToDNA() { pat[2][seq] = state%4; } } - for (i = 0; i < 3; i++) + for (int i = 0; i < 3; ++i) res->addPattern(pat[i], site*3+i); } verbose_mode = save_mode; @@ -3316,7 +3322,7 @@ void Alignment::createBootstrapAlignment(Alignment *aln, IntVector* pattern_freq sequence_type = aln->sequence_type; position_spec = aln->position_spec; aln_file = aln->aln_file; - int site, nsite = aln->getNSite(); + size_t nsite = aln->getNSite(); seq_names.insert(seq_names.begin(), aln->seq_names.begin(), aln->seq_names.end()); num_states = aln->num_states; seq_type = aln->seq_type; @@ -3361,29 +3367,30 @@ void Alignment::createBootstrapAlignment(Alignment *aln, IntVector* pattern_freq int added_sites = 0; IntVector sample; random_resampling(nsite, sample); - for (site = 0; site < nsite; site++) - for (int rep = 0; rep < sample[site]; rep++) { - int ptn_id = aln->getPatternID(site); - Pattern pat = aln->at(ptn_id); - int nptn = getNPattern(); - addPattern(pat, added_sites); - if (!aln->site_state_freq.empty() && getNPattern() > nptn) { - // a new pattern is added, copy state frequency vector - double *state_freq = new double[num_states]; - memcpy(state_freq, aln->site_state_freq[ptn_id], num_states*sizeof(double)); - site_state_freq.push_back(state_freq); + for (size_t site = 0; site < nsite; ++site) { + for (int rep = 0; rep < sample[site]; ++rep) { + int ptn_id = aln->getPatternID(site); + Pattern pat = aln->at(ptn_id); + int nptn = getNPattern(); + addPattern(pat, added_sites); + if (!aln->site_state_freq.empty() && getNPattern() > nptn) { + // a new pattern is added, copy state frequency vector + double *state_freq = new double[num_states]; + memcpy(state_freq, aln->site_state_freq[ptn_id], num_states*sizeof(double)); + site_state_freq.push_back(state_freq); + } + if (pattern_freq) ((*pattern_freq)[ptn_id])++; + added_sites++; } - if (pattern_freq) ((*pattern_freq)[ptn_id])++; - added_sites++; - } + } if (added_sites < nsite) site_pattern.resize(added_sites); } else if (strncmp(spec, "GENESITE,", 9) == 0) { // resampling genes, then resampling sites within resampled genes convert_int_vec(spec+9, site_vec); - int i; IntVector begin_site; - for (i = 0, site = 0; i < site_vec.size(); i++) { + size_t site = 0; + for (size_t i = 0; i < site_vec.size(); ++i) { begin_site.push_back(site); site += site_vec[i]; //cout << "site = " << site_vec[i] << endl; @@ -3391,9 +3398,9 @@ void Alignment::createBootstrapAlignment(Alignment *aln, IntVector* pattern_freq if (site > getNSite()) outError("Sum of lengths exceeded alignment length"); - for (i = 0; i < site_vec.size(); i++) { + for (size_t i = 0; i < site_vec.size(); ++i) { int part = random_int(site_vec.size()); - for (int j = 0; j < site_vec[part]; j++) { + for (int j = 0; j < site_vec[part]; ++j) { site = random_int(site_vec[part]) + begin_site[part]; int ptn = aln->getPatternID(site); Pattern pat = aln->at(ptn); @@ -3404,9 +3411,9 @@ void Alignment::createBootstrapAlignment(Alignment *aln, IntVector* pattern_freq } else if (strncmp(spec, "GENE,", 5) == 0) { // resampling genes instead of sites convert_int_vec(spec+5, site_vec); - int i; + size_t site = 0; IntVector begin_site; - for (i = 0, site = 0; i < site_vec.size(); i++) { + for (size_t i = 0; i < site_vec.size(); ++i) { begin_site.push_back(site); site += site_vec[i]; //cout << "site = " << site_vec[i] << endl; @@ -3414,7 +3421,7 @@ void Alignment::createBootstrapAlignment(Alignment *aln, IntVector* pattern_freq if (site > getNSite()) outError("Sum of lengths exceeded alignment length"); - for (i = 0; i < site_vec.size(); i++) { + for (size_t i = 0; i < site_vec.size(); ++i) { int part = random_int(site_vec.size()); for (site = begin_site[part]; site < begin_site[part] + site_vec[part]; site++) { int ptn = aln->getPatternID(site); @@ -3429,14 +3436,14 @@ void Alignment::createBootstrapAlignment(Alignment *aln, IntVector* pattern_freq if (site_vec.size() % 2 != 0) outError("Bootstrap specification length is not divisible by 2"); nsite = 0; - int part, begin_site = 0, out_site = 0; - for (part = 0; part < site_vec.size(); part+=2) + int begin_site = 0, out_site = 0; + for (size_t part = 0; part < site_vec.size(); part+=2) nsite += site_vec[part+1]; site_pattern.resize(nsite, -1); - for (part = 0; part < site_vec.size(); part += 2) { + for (size_t part = 0; part < site_vec.size(); part+=2) { if (begin_site + site_vec[part] > aln->getNSite()) outError("Sum of lengths exceeded alignment length"); - for (site = 0; site < site_vec[part+1]; site++) { + for (size_t site = 0; site < site_vec[part+1]; ++site) { int site_id = random_int(site_vec[part]) + begin_site; int ptn_id = aln->getPatternID(site_id); Pattern pat = aln->at(ptn_id); @@ -3467,7 +3474,7 @@ void Alignment::createBootstrapAlignment(IntVector &pattern_freq, const char *sp } void Alignment::createBootstrapAlignment(int *pattern_freq, const char *spec, int *rstream) { - int site, nsite = getNSite(); + size_t nsite = getNSite(); memset(pattern_freq, 0, getNPattern()*sizeof(int)); IntVector site_vec; if (Params::getInstance().jackknife_prop > 0.0 && spec) @@ -3477,33 +3484,33 @@ void Alignment::createBootstrapAlignment(int *pattern_freq, const char *spec, in // multi-scale bootstrapping called by AU test int orig_nsite = nsite; double scale = convert_double(spec+6); - nsite = (int)round(scale * nsite); - for (site = 0; site < nsite; site++) { + nsite = (size_t)round(scale * nsite); + for (size_t site = 0; site < nsite; site++) { int site_id = random_int(orig_nsite, rstream); int ptn_id = getPatternID(site_id); pattern_freq[ptn_id]++; } } else if (!spec) { - int nptn = getNPattern(); + size_t nptn = getNPattern(); if (nsite/8 < nptn || Params::getInstance().jackknife_prop > 0.0) { IntVector sample; random_resampling(nsite, sample, rstream); - for (site = 0; site < nsite; site++) - for (int rep = 0; rep < sample[site]; rep++) { - int ptn_id = getPatternID(site); - pattern_freq[ptn_id]++; + for (size_t site = 0; site < nsite; site++) { + for (int rep = 0; rep < sample[site]; rep++) { + int ptn_id = getPatternID(site); + pattern_freq[ptn_id]++; + } } } else { // BQM 2015-12-27: use multinomial sampling for faster generation if #sites is much larger than #patterns - int ptn; double *prob = new double[nptn]; - for (ptn = 0; ptn < nptn; ptn++) + for (size_t ptn = 0; ptn < nptn; ++ptn) prob[ptn] = at(ptn).frequency; gsl_ran_multinomial(nptn, nsite, prob, (unsigned int*)pattern_freq, rstream); int sum = 0; - for (ptn = 0; ptn < nptn; ptn++) + for (size_t ptn = 0; ptn < nptn; ++ptn) sum += pattern_freq[ptn]; ASSERT(sum == nsite); delete [] prob; @@ -3511,17 +3518,17 @@ void Alignment::createBootstrapAlignment(int *pattern_freq, const char *spec, in } else if (strncmp(spec, "GENESITE,", 9) == 0) { // resampling genes, then resampling sites within resampled genes convert_int_vec(spec+9, site_vec); - int i; IntVector begin_site; - for (i = 0, site = 0; i < site_vec.size(); i++) { + size_t site = 0; + for (size_t i = 0; i < site_vec.size(); ++i) { begin_site.push_back(site); site += site_vec[i]; //cout << "site = " << site_vec[i] << endl; } - if (site > getNSite()) + if (site > getNSite()) { outError("Sum of lengths exceeded alignment length"); - - for (i = 0; i < site_vec.size(); i++) { + } + for (size_t i = 0; i < site_vec.size(); ++i) { int part = random_int(site_vec.size(), rstream); for (int j = 0; j < site_vec[part]; j++) { site = random_int(site_vec[part], rstream) + begin_site[part]; @@ -3532,19 +3539,19 @@ void Alignment::createBootstrapAlignment(int *pattern_freq, const char *spec, in } else if (strncmp(spec, "GENE,", 5) == 0) { // resampling genes instead of sites convert_int_vec(spec+5, site_vec); - int i; IntVector begin_site; - for (i = 0, site = 0; i < site_vec.size(); i++) { - begin_site.push_back(site); + size_t site = 0; + for (size_t i = 0; i < site_vec.size(); ++i) { + begin_site.emplace_back(site); site += site_vec[i]; //cout << "site = " << site_vec[i] << endl; } - if (site > getNSite()) + if (site > getNSite()) { outError("Sum of lengths exceeded alignment length"); - - for (i = 0; i < site_vec.size(); i++) { + } + for (size_t i = 0; i < site_vec.size(); ++i) { int part = random_int(site_vec.size(), rstream); - for (site = begin_site[part]; site < begin_site[part] + site_vec[part]; site++) { + for (size_t site = begin_site[part]; site < begin_site[part] + site_vec[part]; ++site) { int ptn = getPatternID(site); pattern_freq[ptn]++; } @@ -3558,11 +3565,11 @@ void Alignment::createBootstrapAlignment(int *pattern_freq, const char *spec, in } if (site_vec.size() % 2 != 0) outError("Bootstrap specification length is not divisible by 2"); - int part, begin_site = 0, out_site = 0; - for (part = 0; part < site_vec.size(); part += 2) { + int begin_site = 0, out_site = 0; + for (size_t part = 0; part < site_vec.size(); part += 2) { if (begin_site + site_vec[part] > getNSite()) outError("Sum of lengths exceeded alignment length"); - for (site = 0; site < site_vec[part+1]; site++) { + for (size_t site = 0; site < site_vec[part+1]; ++site) { int site_id = random_int(site_vec[part], rstream) + begin_site; int ptn_id = getPatternID(site_id); pattern_freq[ptn_id]++; @@ -3575,7 +3582,7 @@ void Alignment::createBootstrapAlignment(int *pattern_freq, const char *spec, in void Alignment::buildFromPatternFreq(Alignment & aln, IntVector new_pattern_freqs){ - int nsite = aln.getNSite(); + size_t nsite = aln.getNSite(); seq_names.insert(seq_names.begin(), aln.seq_names.begin(), aln.seq_names.end()); name = aln.name; model_name = aln.model_name; @@ -3616,10 +3623,14 @@ void Alignment::buildFromPatternFreq(Alignment & aln, IntVector new_pattern_freq void Alignment::createGapMaskedAlignment(Alignment *masked_aln, Alignment *aln) { - if (masked_aln->getNSeq() != aln->getNSeq()) outError("Different number of sequences in masked alignment"); - if (masked_aln->getNSite() != aln->getNSite()) outError("Different number of sites in masked alignment"); - - int site, nsite = aln->getNSite(), nseq = aln->getNSeq(); + if (masked_aln->getNSeq() != aln->getNSeq()) { + outError("Different number of sequences in masked alignment"); + } + if (masked_aln->getNSite() != aln->getNSite()) { + outError("Different number of sites in masked alignment"); + } + size_t nsite = aln->getNSite(); + size_t nseq = aln->getNSeq(); seq_names.insert(seq_names.begin(), aln->seq_names.begin(), aln->seq_names.end()); name = aln->name; model_name = aln->model_name; @@ -3647,17 +3658,19 @@ void Alignment::createGapMaskedAlignment(Alignment *masked_aln, Alignment *aln) } VerboseMode save_mode = verbose_mode; verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern - for (site = 0; site < nsite; site++) { + for (size_t site = 0; site < nsite; ++site) { int ptn_id = aln->getPatternID(site); Pattern pat = aln->at(ptn_id); Pattern masked_pat = masked_aln->at(masked_aln->getPatternID(site)); - for (int seq = 0; seq < nseq; seq++) - if (masked_pat[name_map[seq]] == STATE_UNKNOWN) pat[seq] = STATE_UNKNOWN; + for (size_t seq = 0; seq < nseq; ++seq) { + if (masked_pat[name_map[seq]] == STATE_UNKNOWN) { + pat[seq] = STATE_UNKNOWN; + } + } addPattern(pat, site); } verbose_mode = save_mode; countConstSite(); -// buildSeqStates(); } void Alignment::shuffleAlignment() { @@ -3667,33 +3680,42 @@ void Alignment::shuffleAlignment() { void Alignment::concatenateAlignment(Alignment *aln) { - if (getNSeq() != aln->getNSeq()) outError("Different number of sequences in two alignments"); - if (num_states != aln->num_states) outError("Different number of states in two alignments"); - if (seq_type != aln->seq_type) outError("Different data type in two alignments"); - int site, nsite = aln->getNSite(); - int cur_sites = getNSite(); + if (getNSeq() != aln->getNSeq()) { + outError("Different number of sequences in two alignments"); + } + if (num_states != aln->num_states) { + outError("Different number of states in two alignments"); + } + if (seq_type != aln->seq_type) { + outError("Different data type in two alignments"); + } + size_t nsite = aln->getNSite(); + size_t cur_sites = getNSite(); site_pattern.resize(cur_sites + nsite , -1); IntVector name_map; for (StrVector::iterator it = seq_names.begin(); it != seq_names.end(); it++) { int seq_id = aln->getSeqID(*it); - if (seq_id < 0) outError("The other alignment does not contain taxon ", *it); + if (seq_id < 0) { + outError("The other alignment does not contain taxon ", *it); + } name_map.push_back(seq_id); } VerboseMode save_mode = verbose_mode; verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern - for (site = 0; site < nsite; site++) { + for (size_t site = 0; site < nsite; site++) { Pattern pat = aln->at(aln->getPatternID(site)); Pattern new_pat = pat; - for (int i = 0; i < name_map.size(); i++) new_pat[i] = pat[name_map[i]]; + for (size_t i = 0; i < name_map.size(); i++) { + new_pat[i] = pat[name_map[i]]; + } addPattern(new_pat, site + cur_sites); } verbose_mode = save_mode; countConstSite(); -// buildSeqStates(); } void Alignment::copyAlignment(Alignment *aln) { - int site, nsite = aln->getNSite(); + size_t nsite = aln->getNSite(); seq_names.insert(seq_names.begin(), aln->seq_names.begin(), aln->seq_names.end()); name = aln->name; model_name = aln->model_name; @@ -3715,7 +3737,7 @@ void Alignment::copyAlignment(Alignment *aln) { pattern_index.clear(); VerboseMode save_mode = verbose_mode; verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern - for (site = 0; site < nsite; site++) { + for (size_t site = 0; site < nsite; ++site) { int site_id = site; int ptn_id = aln->getPatternID(site_id); Pattern pat = aln->at(ptn_id); @@ -3766,7 +3788,7 @@ void generateSubsets(vector &inset, vector > &subsets) { } void Alignment::generateUninfPatterns(StateType repeat, vector &singleton, vector &seq_pos, vector &unobserved_ptns) { - int seqs = getNSeq(); + size_t seqs = getNSeq(); if (seq_pos.size() == singleton.size()) { Pattern pat; pat.resize(seqs, repeat); @@ -3775,7 +3797,7 @@ void Alignment::generateUninfPatterns(StateType repeat, vector &singl unobserved_ptns.push_back(pat); return; } - for (int seq = 0; seq < seqs; seq++) { + for (size_t seq = 0; seq < seqs; seq++) { bool dup = false; for (auto s: seq_pos) if (seq == s) { dup = true; break; } @@ -3792,16 +3814,16 @@ void Alignment::getUnobservedConstPatterns(ASCType ASC_type, vector &un case ASC_VARIANT: { // Lewis's correction for variant sites unobserved_ptns.reserve(num_states); - for (StateType state = 0; state < num_states; state++) - if (!isStopCodon(state)) - { + for (StateType state = 0; state < num_states; state++) { + if (!isStopCodon(state)) { Pattern pat; pat.resize(getNSeq(), state); if (pattern_index.find(pat) == pattern_index.end()) { // constant pattern is unobserved unobserved_ptns.push_back(pat); } - } + } + } break; } case ASC_VARIANT_MISSING: { @@ -3918,22 +3940,18 @@ double Alignment::computeJCDist(int seq1, int seq2) { } void Alignment::printDist(ostream &out, double *dist_mat) { - int nseqs = getNSeq(); + size_t nseqs = getNSeq(); int max_len = getMaxSeqNameLength(); if (max_len < 10) max_len = 10; out << nseqs << endl; - int pos = 0; + size_t pos = 0; out.precision(max((int)ceil(-log10(Params::getInstance().min_branch_length))+1, 6)); out << fixed; - for (int seq1 = 0; seq1 < nseqs; seq1 ++) { + for (size_t seq1 = 0; seq1 < nseqs; ++seq1) { out.width(max_len); out << left << getSeqName(seq1) << " "; - for (int seq2 = 0; seq2 < nseqs; seq2 ++) { + for (size_t seq2 = 0; seq2 < nseqs; ++seq2) { out << dist_mat[pos++]; - /*if (seq2 % 7 == 6) { - out << endl; - out.width(max_len+1); - } */ out << " "; } out << endl; @@ -3955,7 +3973,7 @@ void Alignment::printDist(const char *file_name, double *dist_mat) { double Alignment::readDist(istream &in, double *dist_mat) { double longest_dist = 0.0; - int nseqs; + size_t nseqs; in >> nseqs; if (nseqs != getNSeq()) throw "Distance file has different number of taxa"; @@ -4193,11 +4211,10 @@ void Alignment::computeAbsoluteStateFreq(unsigned int *abs_state_freq) { void Alignment::countStatePerSequence (unsigned *count_per_sequence) { - int i; - int nseqs = getNSeq(); + size_t nseqs = getNSeq(); memset(count_per_sequence, 0, sizeof(unsigned)*num_states*nseqs); for (iterator it = begin(); it != end(); it++) - for (i = 0; i != nseqs; i++) { + for (size_t i = 0; i != nseqs; ++i) { int state = convertPomoState(it->at(i)); if (state < num_states) { count_per_sequence[i*num_states + state] += it->frequency; @@ -4206,25 +4223,22 @@ void Alignment::countStatePerSequence (unsigned *count_per_sequence) { } void Alignment::computeStateFreqPerSequence (double *freq_per_sequence) { - int i, j; - int nseqs = getNSeq(); + size_t nseqs = getNSeq(); double *states_app = new double[num_states*(STATE_UNKNOWN+1)]; double *new_freq = new double[num_states]; unsigned *state_count = new unsigned[(STATE_UNKNOWN+1)*nseqs]; double *new_state_freq = new double[num_states]; - - memset(state_count, 0, sizeof(unsigned)*(STATE_UNKNOWN+1)*nseqs); - for (i = 0; i <= STATE_UNKNOWN; i++) + for (int i = 0; i <= STATE_UNKNOWN; i++) getAppearance(i, &states_app[i*num_states]); for (iterator it = begin(); it != end(); it++) - for (i = 0; i != nseqs; i++) { + for (size_t i = 0; i != nseqs; i++) { state_count[i*(STATE_UNKNOWN+1) + it->at(i)] += it->frequency; } double equal_freq = 1.0/num_states; - for (i = 0; i < num_states*nseqs; i++) + for (size_t i = 0; i < num_states*nseqs; i++) freq_per_sequence[i] = equal_freq; const int NUM_TIME = 8; @@ -4232,24 +4246,24 @@ void Alignment::computeStateFreqPerSequence (double *freq_per_sequence) { for (int seq = 0; seq < nseqs; seq++) { double *state_freq = &freq_per_sequence[seq*num_states]; memset(new_state_freq, 0, sizeof(double)*num_states); - for (i = 0; i <= STATE_UNKNOWN; i++) { + for (int i = 0; i <= STATE_UNKNOWN; i++) { if (state_count[seq*(STATE_UNKNOWN+1)+i] == 0) continue; double sum_freq = 0.0; - for (j = 0; j < num_states; j++) { + for (int j = 0; j < num_states; j++) { new_freq[j] = state_freq[j] * states_app[i*num_states+j]; sum_freq += new_freq[j]; } sum_freq = 1.0/sum_freq; - for (j = 0; j < num_states; j++) { + for (int j = 0; j < num_states; j++) { new_state_freq[j] += new_freq[j]*sum_freq*state_count[seq*(STATE_UNKNOWN+1)+i]; } } double sum_freq = 0.0; - for (j = 0; j < num_states; j++) + for (int j = 0; j < num_states; j++) sum_freq += new_state_freq[j]; sum_freq = 1.0/sum_freq; - for (j = 0; j < num_states; j++) + for (int j = 0; j < num_states; j++) state_freq[j] = new_state_freq[j]*sum_freq; } } @@ -4425,8 +4439,7 @@ void Alignment::getAppearance(StateType state, StateBitset &state_app) { } void Alignment::computeCodonFreq(StateFreqType freq, double *state_freq, double *ntfreq) { - int nseqs = getNSeq(); - int i, j; + size_t nseqs = getNSeq(); if (freq == FREQ_CODON_1x4) { memset(ntfreq, 0, sizeof(double)*4); @@ -4443,19 +4456,19 @@ void Alignment::computeCodonFreq(StateFreqType freq, double *state_freq, double } } double sum = 0; - for (i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) sum += ntfreq[i]; - for (i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) ntfreq[i] /= sum; if (verbose_mode >= VB_MED) { - for (i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) cout << " " << symbols_dna[i] << ": " << ntfreq[i]; cout << endl; } memcpy(ntfreq+4, ntfreq, sizeof(double)*4); memcpy(ntfreq+8, ntfreq, sizeof(double)*4); sum = 0.0; - for (i = 0; i < num_states; i++) { + for (int i = 0; i < num_states; i++) { int codon = codon_table[i]; state_freq[i] = ntfreq[codon/16] * ntfreq[(codon%16)/4] * ntfreq[codon%4]; if (isStopCodon(i)) { @@ -4467,11 +4480,11 @@ void Alignment::computeCodonFreq(StateFreqType freq, double *state_freq, double } // sum = (1.0-sum)/(1.0-sum_stop); sum = 1.0/sum; - for (i = 0; i < num_states; i++) + for (int i = 0; i < num_states; i++) if (!isStopCodon(i)) state_freq[i] *= sum; sum = 0.0; - for (i = 0; i < num_states; i++) + for (int i = 0; i < num_states; i++) sum += state_freq[i]; ASSERT(fabs(sum-1.0)<1e-5); } else if (freq == FREQ_CODON_3x4) { @@ -4489,14 +4502,14 @@ void Alignment::computeCodonFreq(StateFreqType freq, double *state_freq, double ntfreq[8+nt3] += (*it).frequency; } } - for (j = 0; j < 12; j+=4) { + for (int j = 0; j < 12; j+=4) { double sum = 0; - for (i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) sum += ntfreq[i+j]; - for (i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) ntfreq[i+j] /= sum; if (verbose_mode >= VB_MED) { - for (i = 0; i < 4; i++) + for (int i = 0; i < 4; i++) cout << " " << symbols_dna[i] << ": " << ntfreq[i+j]; cout << endl; } @@ -4504,7 +4517,7 @@ void Alignment::computeCodonFreq(StateFreqType freq, double *state_freq, double // double sum_stop=0.0; double sum = 0.0; - for (i = 0; i < num_states; i++) { + for (int i = 0; i < num_states; i++) { int codon = codon_table[i]; state_freq[i] = ntfreq[codon/16] * ntfreq[4+(codon%16)/4] * ntfreq[8+codon%4]; if (isStopCodon(i)) { @@ -4516,11 +4529,11 @@ void Alignment::computeCodonFreq(StateFreqType freq, double *state_freq, double } // sum = (1.0-sum)/(1.0-sum_stop); sum = 1.0 / sum; - for (i = 0; i < num_states; i++) + for (int i = 0; i < num_states; i++) if (!isStopCodon(i)) state_freq[i] *= sum; sum = 0.0; - for (i = 0; i < num_states; i++) + for (int i = 0; i < num_states; i++) sum += state_freq[i]; ASSERT(fabs(sum-1.0)<1e-5); @@ -4564,9 +4577,9 @@ void Alignment::computeCodonFreq(StateFreqType freq, double *state_freq, double outError("F3X4C not yet implemented. Contact authors if you really need it."); } else if (freq == FREQ_EMPIRICAL || freq == FREQ_ESTIMATE) { memset(state_freq, 0, num_states*sizeof(double)); - i = 0; - for (iterator it = begin(); it != end(); it++, i++) - for (int seq = 0; seq < nseqs; seq++) { + int i = 0; + for (iterator it = begin(); it != end(); ++it, ++i) + for (size_t seq = 0; seq < nseqs; seq++) { int state = it->at(seq); if (state >= num_states) continue; state_freq[state] += it->frequency; @@ -4585,7 +4598,7 @@ void Alignment::computeCodonFreq(StateFreqType freq, double *state_freq, double void Alignment::computeDivergenceMatrix(double *pair_freq, double *state_freq, bool normalize) { int i, j; ASSERT(pair_freq); - int nseqs = getNSeq(); + size_t nseqs = getNSeq(); memset(pair_freq, 0, sizeof(double)*num_states*num_states); memset(state_freq, 0, sizeof(double)*num_states); @@ -4677,7 +4690,7 @@ std::ostream& operator<<(std::ostream& stream, const SymTestResult& res) { void Alignment::doSymTest(size_t vecid, vector &vec_sym, vector &vec_marsym, vector &vec_intsym, int *rstream, vector *stats) { - int nseq = getNSeq(); + size_t nseq = getNSeq(); const double chi2_cutoff = Params::getInstance().symtest_pcutoff; @@ -4881,11 +4894,11 @@ void Alignment::printSiteGaps(const char *filename) { out.open(filename); int nsite = getNSite(); out << nsite << endl << "Site_Gap "; - for (int site = 0; site < getNSite(); site++) { + for (size_t site = 0; site < getNSite(); ++site) { out << " " << at(getPatternID(site)).computeGapChar(num_states, STATE_UNKNOWN); } out << endl << "Site_Ambi "; - for (int site = 0; site < getNSite(); site++) { + for (size_t site = 0; site < getNSite(); ++site) { out << " " << at(getPatternID(site)).computeAmbiguousChar(num_states); } out << endl; @@ -4915,7 +4928,7 @@ void Alignment::multinomialProb(Alignment refAlign, double &prob) // cout << "Computing the probability of this alignment given the multinomial distribution determined by a reference alignment ..." << endl; //should we check for compatibility of sequence's names and sequence's order in THIS alignment and in the objectAlign?? //check alignment length - int nsite = getNSite(); + size_t nsite = getNSite(); ASSERT(nsite == refAlign.getNSite()); double sumFac = 0; double sumProb = 0; @@ -4947,7 +4960,7 @@ void Alignment::multinomialProb (DoubleVector logLL, double &prob) ASSERT(logLL.size() == patNum); - int alignLen = getNSite(); + size_t alignLen = getNSite(); //resize the expectedNorFre vector expectedNorFre.resize(patNum,-1); @@ -5075,8 +5088,8 @@ double Alignment::multinomialProb (IntVector &pattern_freq) //return expectedNorFre; //compute the probability of having expectedNorFre given the observed pattern frequencies of THIS alignment ASSERT(size() == pattern_freq.size()); - int patNum = getNPattern(); - int alignLen = getNSite(); + size_t patNum = getNPattern(); + size_t alignLen = getNSite(); double sumFac = 0; double sumProb = 0; double fac = logFac(alignLen); @@ -5092,10 +5105,9 @@ bool Alignment::readSiteStateFreq(const char* site_freq_file) { cout << endl << "Reading site-specific state frequency file " << site_freq_file << " ..." << endl; site_model.resize(getNSite(), -1); - int i; IntVector pattern_to_site; // vector from pattern to the first site pattern_to_site.resize(getNPattern(), -1); - for (i = 0; i < getNSite(); i++) + for (size_t i = 0; i < getNSite(); ++i) if (pattern_to_site[getPatternID(i)] == -1) pattern_to_site[getPatternID(i)] = i; @@ -5123,7 +5135,7 @@ bool Alignment::readSiteStateFreq(const char* site_freq_file) } double *site_freq_entry = new double[num_states]; double sum = 0; - for (i = 0; i < num_states; i++) { + for (int i = 0; i < num_states; ++i) { in >> freq; if (freq <= 0.0 || freq >= 1.0) throw "Frequencies must be strictly positive and smaller than 1"; site_freq_entry[i] = freq; @@ -5133,7 +5145,7 @@ bool Alignment::readSiteStateFreq(const char* site_freq_file) if (fabs(sum-1.0) > 1e-3) outWarning("Frequencies of site " + site_spec + " do not sum up to 1 and will be normalized"); sum = 1.0/sum; - for (i = 0; i < num_states; i++) + for (int i = 0; i < num_states; ++i) site_freq_entry[i] *= sum; } convfreq(site_freq_entry); // regularize frequencies (eg if some freq = 0) @@ -5144,7 +5156,7 @@ bool Alignment::readSiteStateFreq(const char* site_freq_file) // compare freq with prev_site bool matched_freq = true; double *prev_freq = site_state_freq[site_model[prev_site]]; - for (i = 0; i < num_states; i++) { + for (int i = 0; i < num_states; ++i) { if (site_freq_entry[i] != prev_freq[i]) { matched_freq = false; break; @@ -5165,7 +5177,7 @@ bool Alignment::readSiteStateFreq(const char* site_freq_file) aln_changed = true; // there are some unspecified sites cout << site_model.size() - specified_sites << " unspecified sites will get default frequencies" << endl; - for (i = 0; i < site_model.size(); i++) + for (size_t i = 0; i < site_model.size(); ++i) if (site_model[i] == -1) site_model[i] = site_state_freq.size(); site_state_freq.push_back(NULL); diff --git a/alignment/alignment.h b/alignment/alignment.h index 51a0b633e..b963a28fc 100644 --- a/alignment/alignment.h +++ b/alignment/alignment.h @@ -335,21 +335,21 @@ class Alignment : public vector, public CharSet, public StateSpace { /** @return number of sequences */ - inline int getNSeq() const { + inline size_t getNSeq() const { return seq_names.size(); } /** @return number of sites (alignment columns) */ - inline int getNSite() { + inline size_t getNSite() { return site_pattern.size(); } /** @return number of patterns */ - inline int getNPattern() { + inline size_t getNPattern() { return size(); } @@ -429,6 +429,14 @@ class Alignment : public vector, public CharSet, public StateSpace { */ virtual Alignment *removeIdenticalSeq(string not_remove, bool keep_two, StrVector &removed_seqs, StrVector &target_seqs); + /** + * calculating hashes for sequences + * @param v state at a given site, in the sequence being hashed + * @param hash running hash value for the sequence (modified) + */ + void adjustHash(StateType v, size_t& hash) const; + void adjustHash(bool v, size_t& hash) const; + /** Quit if some sequences contain only gaps or missing data */ @@ -443,7 +451,7 @@ class Alignment : public vector, public CharSet, public StateSpace { @return TRUE if seq_id contains only gaps or missing characters @param seq_id sequence ID */ - bool isGapOnlySeq(int seq_id); + bool isGapOnlySeq(size_t seq_id); virtual bool isSuperAlignment() { return false; diff --git a/alignment/alignmentpairwise.cpp b/alignment/alignmentpairwise.cpp index 088bf2363..f3ea23bcb 100644 --- a/alignment/alignmentpairwise.cpp +++ b/alignment/alignmentpairwise.cpp @@ -382,7 +382,7 @@ double AlignmentPairwise::optimizeDist(double initial_dist) { return optimizeDist(initial_dist, d2l); } -double AlignmentPairwise::computeDist +double AlignmentPairwise::recomputeDist ( int seq1, int seq2, double initial_dist, double &d2l ) { //Only called when -experimental has been passed if (initial_dist == 0.0) { diff --git a/alignment/alignmentpairwise.h b/alignment/alignmentpairwise.h index 348d72c19..cae1c75b0 100644 --- a/alignment/alignmentpairwise.h +++ b/alignment/alignmentpairwise.h @@ -105,7 +105,7 @@ class AlignmentPairwise : public Alignment, public Optimization @return a new estimate of branch length */ - virtual double computeDist( int seq1, int seq2, double initial_dist, double &d2l ); + virtual double recomputeDist( int seq1, int seq2, double initial_dist, double &d2l ); /** destructor diff --git a/alignment/alignmentsummary.cpp b/alignment/alignmentsummary.cpp index aa2092d00..ae7b0684a 100644 --- a/alignment/alignmentsummary.cpp +++ b/alignment/alignmentsummary.cpp @@ -7,7 +7,6 @@ #include "alignment.h" #include "alignmentsummary.h" -#include AlignmentSummary::AlignmentSummary(const Alignment* a, bool keepConstSites) { alignment = a; @@ -31,18 +30,20 @@ AlignmentSummary::AlignmentSummary(const Alignment* a, bool keepConstSites) { SiteSummary(): isConst(false), frequency(0), minState(0), maxState(0) {} }; - boost::scoped_array siteArray( new SiteSummary[alignment->size()] ); + size_t siteCount = alignment->size(); + std::vector sites; + sites.reserve(siteCount); #ifdef _OPENMP #pragma omp parallel for #endif - for (int site=0; sitesize(); ++site) { //per site - auto itSite = alignment->begin() + site; - SiteSummary &s = *(siteArray.get() + site); + for (size_t site=0; sitebegin() + site; + SiteSummary &s = sites[site]; StateType minStateForSite = (*itSite)[0]; StateType maxStateForSite = minStateForSite; s.isConst = itSite->isConst(); s.frequency = itSite->frequency; - for (int seq=1; seq map = stateToSumOfConstantSiteFrequencies; - for (int i=0; isize(); ++i, ++s) { + for (size_t site=0; sitefrequency; - totalFrequencyOfNonConstSites += s->isConst ? s->frequency : 0; - if ( keepConstSites || !s->isConst) { - if ( 0 < s->frequency && s->minState < s->maxState) { + totalFrequency += s.frequency; + totalFrequencyOfNonConstSites += s.isConst ? 0 : s.frequency; + if ( keepConstSites || !s.isConst) { + if ( 0 < s.frequency && s.minState < s.maxState) { alreadyCounted = true; if (sequenceLength==0) { - minState = s->minState; - maxState = s->maxState; + minState = s.minState; + maxState = s.maxState; } else { - if (s->minState < minState) { - minState = s->minState; + if (s.minState < minState) { + minState = s.minState; } - if (maxState < s->maxState) { - maxState = s->maxState; + if (maxState < s.maxState) { + maxState = s.maxState; } } - siteNumbers.emplace_back(i); - siteFrequencies.emplace_back(s->frequency); - nonConstSiteFrequencies.emplace_back(s->isConst ? 0 : s->frequency); + siteNumbers.emplace_back(site); + siteFrequencies.emplace_back(s.frequency); + nonConstSiteFrequencies.emplace_back(s.isConst ? 0 : s.frequency); ++sequenceLength; } } - if (s->minState == s->maxState && !alreadyCounted) { - if (map.find(s->minState)==map.end()) { - map[s->minState] = s->frequency; + if (s.minState == s.maxState && !alreadyCounted) { + if (map.find(s.minState)==map.end()) { + map[s.minState] = s.frequency; } else { - map[s->minState] += s->frequency; + map[s.minState] += s.frequency; } } } @@ -98,7 +99,7 @@ AlignmentSummary::~AlignmentSummary() { sequenceCount = 0; } -int AlignmentSummary::getSumOfConstantSiteFrequenciesForState(int state) { +size_t AlignmentSummary::getSumOfConstantSiteFrequenciesForState(int state) { auto it = stateToSumOfConstantSiteFrequencies.find(state); return (it == stateToSumOfConstantSiteFrequencies.end()) ? 0 : it->second; } diff --git a/alignment/alignmentsummary.h b/alignment/alignmentsummary.h index 83a6bceb8..4dfc620c4 100644 --- a/alignment/alignmentsummary.h +++ b/alignment/alignmentsummary.h @@ -29,14 +29,14 @@ struct AlignmentSummary std::vector nonConstSiteFrequencies; //ditto, but zeroed if site //isConst according to alignment std::map stateToSumOfConstantSiteFrequencies; - int totalFrequency; //sum of frequencies (*including* constant sites!) - int totalFrequencyOfNonConstSites; //ditto (*excluding* constant sites!) + size_t totalFrequency; //sum of frequencies (*including* constant sites!) + size_t totalFrequencyOfNonConstSites; //ditto (*excluding* constant sites!) StateType minState; //found on any site where there is variation StateType maxState; //ditto char* sequenceMatrix; size_t sequenceLength; //Sequence length size_t sequenceCount; //The number of sequences - int getSumOfConstantSiteFrequenciesForState(int state); + size_t getSumOfConstantSiteFrequenciesForState(int state); bool constructSequenceMatrix(bool treatAllAmbiguousStatesAsUnknown); }; diff --git a/alignment/maalignment.cpp b/alignment/maalignment.cpp index 353f87474..b420dc04a 100644 --- a/alignment/maalignment.cpp +++ b/alignment/maalignment.cpp @@ -88,8 +88,8 @@ IntVector MaAlignment::computeExpectedNorFre() if ( logLL.empty()) outError("Error: log likelihood of patterns are not given!"); - int patNum = getNPattern(); - int alignLen = getNSite(); + size_t patNum = getNPattern(); + size_t alignLen = getNSite(); //resize the expectedNorFre vector expectedNorFre.resize(patNum,-1); @@ -144,13 +144,12 @@ void MaAlignment::printPatObsExpFre(const char *fileName, const IntVector expect out.open(fileName); out << "Pattern\tLogLL\tObservedFre\tExpectedFre" << endl; - int patNum = getNPattern(); - int seqNum = getNSeq(); - int seqID; + size_t patNum = getNPattern(); + size_t seqNum = getNSeq(); - for ( int i = 0; i < patNum; i++ ) + for ( size_t i = 0; i < patNum; ++i ) { - for ( seqID = 0; seqID < seqNum; seqID++ ){ + for ( size_t seqID = 0; seqID < seqNum; ++seqID ){ out << convertStateBackStr(at(i)[seqID]); } out << "\t" << logLL[i] << "\t" << (*this)[i].frequency << "\t" << expectedNorFre[i] << endl; diff --git a/alignment/pattern.cpp b/alignment/pattern.cpp index b135a373b..5921909a5 100644 --- a/alignment/pattern.cpp +++ b/alignment/pattern.cpp @@ -19,7 +19,7 @@ Pattern::Pattern() // is_const = false; // is_informative = false; flag = 0; - const_char = 255; + const_char = -1; num_chars = 0; } @@ -30,7 +30,7 @@ Pattern::Pattern(int nseq, int freq) // is_const = false; // is_informative = false; flag = 0; - const_char = 255; + const_char = -1; num_chars = 0; } diff --git a/alignment/superalignment.cpp b/alignment/superalignment.cpp index c1999f5bf..bd893de92 100644 --- a/alignment/superalignment.cpp +++ b/alignment/superalignment.cpp @@ -24,10 +24,6 @@ #include "nclextra/myreader.h" #include "main/phylotesting.h" #include "utils/timeutil.h" //for getRealTime() -#ifdef USE_BOOST -#include //for boost::hash_combine -#endif - Alignment *createAlignment(string aln_file, const char *sequence_type, InputType intype, string model_name) { bool is_dir = isDirectory(aln_file.c_str()); @@ -154,7 +150,7 @@ void SuperAlignment::init(StrVector *sequence_names) { max_num_states = 0; // first build taxa_index and partitions - int site, seq, nsite = partitions.size(); + size_t nsite = partitions.size(); // BUG FIX 2016-11-29: when merging partitions with -m TESTMERGE, sequence order is changed // get the taxa names from existing tree @@ -165,12 +161,11 @@ void SuperAlignment::init(StrVector *sequence_names) { i->resize(nsite, -1); } - site = 0; - for (auto it = partitions.begin(); it != partitions.end(); it++, site++) { -// partitions.push_back((*it)->aln); - int nseq = (*it)->getNSeq(); + size_t site = 0; + for (auto it = partitions.begin(); it != partitions.end(); ++it, ++site) { + size_t nseq = (*it)->getNSeq(); //cout << "nseq = " << nseq << endl; - for (seq = 0; seq < nseq; seq++) { + for (size_t seq = 0; seq < nseq; ++seq) { int id = getSeqID((*it)->getSeqName(seq)); if (id < 0) { seq_names.push_back((*it)->getSeqName(seq)); @@ -187,8 +182,7 @@ void SuperAlignment::init(StrVector *sequence_names) { } void SuperAlignment::buildPattern() { - int site, seq, nsite = partitions.size(); - + size_t nsite = partitions.size(); seq_type = SEQ_BINARY; num_states = 2; // binary type because the super alignment presents the presence/absence of taxa in the partitions STATE_UNKNOWN = 2; @@ -197,11 +191,11 @@ void SuperAlignment::buildPattern() { pattern_index.clear(); VerboseMode save_mode = verbose_mode; verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern - int nseq = getNSeq(); - for (site = 0; site < nsite; site++) { + size_t nseq = getNSeq(); + for (size_t site = 0; site < nsite; site++) { Pattern pat; pat.resize(nseq, 0); - for (seq = 0; seq < nseq; seq++) + for (size_t seq = 0; seq < nseq; seq++) pat[seq] = (taxa_index[seq][site] >= 0)? 1 : 0; addPattern(pat, site); } @@ -627,8 +621,9 @@ void SuperAlignment::printPartition(ostream &out, const char *aln_file, bool app if (aln_file) out << "[ partition information for alignment written in " << aln_file <<" file ]" << endl; out << "begin sets;" << endl; - int part; int start_site; - for (part = 0, start_site = 1; part < partitions.size(); part++) { + int part; + int start_site = 1; + for (size_t part = 0; part < partitions.size(); ++part) { string name = partitions[part]->name; replace(name.begin(), name.end(), '+', '_'); int end_site = start_site + partitions[part]->getNSite(); @@ -636,7 +631,7 @@ void SuperAlignment::printPartition(ostream &out, const char *aln_file, bool app start_site = end_site; } bool ok_model = true; - for (part = 0; part < partitions.size(); part++) + for (size_t part = 0; part < partitions.size(); ++part) if (partitions[part]->model_name.empty()) { ok_model = false; break; @@ -781,10 +776,10 @@ void SuperAlignment::printBestPartitionRaxml(const char *filename) { void SuperAlignment::linkSubAlignment(int part) { ASSERT(taxa_index.size() == getNSeq()); - int nseq = getNSeq(), seq; + size_t nseq = getNSeq(); vector checked; checked.resize(partitions[part]->getNSeq(), false); - for (seq = 0; seq < nseq; seq++) { + for (size_t seq = 0; seq < nseq; seq++) { int id = partitions[part]->getSeqID(getSeqName(seq)); if (id < 0) taxa_index[seq][part] = -1; @@ -792,12 +787,9 @@ void SuperAlignment::linkSubAlignment(int part) { taxa_index[seq][part] = id; checked[id] = true; } - } - if (verbose_mode >= VB_MED) { - } // sanity check that all seqnames in partition must be present in superalignment - for (seq = 0; seq < checked.size(); seq++) { + for (size_t seq = 0; seq < checked.size(); seq++) { ASSERT(checked[seq]); } } @@ -811,9 +803,7 @@ void SuperAlignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int position_spec = aln->position_spec; aln_file = aln->aln_file; - int i; - IntVector::iterator it; - for (it = seq_id.begin(); it != seq_id.end(); it++) { + for (auto it = seq_id.begin(); it != seq_id.end(); it++) { ASSERT(*it >= 0 && *it < aln->getNSeq()); seq_names.push_back(aln->getSeqName(*it)); } @@ -822,8 +812,9 @@ void SuperAlignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int //Alignment::extractSubAlignment(aln, seq_id, 0); taxa_index.resize(getNSeq()); - for (i = 0; i < getNSeq(); i++) + for (size_t i = 0; i < getNSeq(); ++i) { taxa_index[i].resize(saln->partitions.size(), -1); + } int part = 0; // partitions.resize(saln->partitions.size()); @@ -845,8 +836,9 @@ void SuperAlignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int } if (partitions.size() < saln->partitions.size()) { - for (i = 0; i < getNSeq(); i++) + for (size_t i = 0; i < getNSeq(); ++i) { taxa_index[i].resize(partitions.size()); + } } // now build the patterns based on taxa_index @@ -871,13 +863,13 @@ SuperAlignment *SuperAlignment::extractPartitions(IntVector &part_id) { } } - int i; newaln->taxa_index.resize(newaln->getNSeq()); - for (i = 0; i < newaln->getNSeq(); i++) + for (size_t i = 0; i < newaln->getNSeq(); ++i) { newaln->taxa_index[i].resize(part_id.size(), -1); + } - int part = 0; - for (auto ait = part_id.begin(); ait != part_id.end(); ait++, part++) { + size_t part = 0; + for (auto ait = part_id.begin(); ait != part_id.end(); ++ait, ++part) { newaln->partitions.push_back(partitions[*ait]); newaln->linkSubAlignment(newaln->partitions.size()-1); } @@ -888,11 +880,9 @@ SuperAlignment *SuperAlignment::extractPartitions(IntVector &part_id) { } void SuperAlignment::removePartitions(set &removed_id) { - // remove part_id from partitions vector new_partitions; - int i; - for (i = 0; i < partitions.size(); i++) + for (size_t i = 0; i < partitions.size(); ++i) if (removed_id.find(i) == removed_id.end()) { // not found in the removed set new_partitions.push_back(partitions[i]); @@ -918,9 +908,9 @@ void SuperAlignment::removePartitions(set &removed_id) { // build the taxa_index taxa_index.resize(getNSeq()); - for (i = 0; i < getNSeq(); i++) + for (size_t i = 0; i < getNSeq(); ++i) taxa_index[i].resize(partitions.size(), -1); - for (i = 0; i < partitions.size(); i++) + for (size_t i = 0; i < partitions.size(); ++i) linkSubAlignment(i); // now build the patterns based on taxa_index @@ -948,10 +938,10 @@ Alignment *SuperAlignment::removeIdenticalSeq(string not_remove, bool keep_two, for (auto ait = partitions.begin(); ait != partitions.end(); ait++, part++) { int subseq1 = taxa_index[seq1][part]; bool present = ( 0 < subseq1 ); - boost::hash_combine(hash, present); + adjustHash(present, hash); if (present) { for (iterator it = (*ait)->begin(); it != (*ait)->end(); it++) { - boost::hash_combine(hash, (*it)[subseq1] ); + adjustHash((*it)[subseq1],hash); } } } @@ -967,10 +957,10 @@ Alignment *SuperAlignment::removeIdenticalSeq(string not_remove, bool keep_two, bool listIdentical = !Params::getInstance().suppress_duplicate_sequence_warnings; auto startCheck = getRealTime(); - for (int seq1 = 0; seq1 < getNSeq(); seq1++) { + for (size_t seq1 = 0; seq1 < getNSeq(); ++seq1) { if (checked[seq1]) continue; bool first_ident_seq = true; - for (int seq2 = seq1+1; seq2 < getNSeq(); seq2++) { + for (size_t seq2 = seq1+1; seq2 < getNSeq(); ++seq2) { if (getSeqName(seq2) == not_remove || removed[seq2]) { continue; } if (hashes[seq1]!=hashes[seq2]) { @@ -1005,7 +995,7 @@ Alignment *SuperAlignment::removeIdenticalSeq(string not_remove, bool keep_two, if (!equal_seq) { continue; } - if (removed_seqs.size() < getNSeq()-3 && (!keep_two || !first_ident_seq)) { + if (removed_seqs.size() + 3 < getNSeq() && (!keep_two || !first_ident_seq)) { removed_seqs.push_back(getSeqName(seq2)); target_seqs.push_back(getSeqName(seq1)); removed[seq2] = true; @@ -1027,12 +1017,12 @@ Alignment *SuperAlignment::removeIdenticalSeq(string not_remove, bool keep_two, if (removed_seqs.empty()) return this; // do nothing if the list is empty - if (removed_seqs.size() >= getNSeq()-3) + if (removed_seqs.size() + 3 >= getNSeq()) outWarning("Your alignment contains too many identical sequences!"); // now remove identical sequences IntVector keep_seqs; - for (int seq1 = 0; seq1 < getNSeq(); seq1++) + for (size_t seq1 = 0; seq1 < getNSeq(); ++seq1) { if (!removed[seq1]) keep_seqs.push_back(seq1); } @@ -1044,7 +1034,7 @@ Alignment *SuperAlignment::removeIdenticalSeq(string not_remove, bool keep_two, int SuperAlignment::checkAbsentStates(string msg) { int count = 0; - for (auto it = partitions.begin(); it != partitions.end(); it++) + for (auto it = partitions.begin(); it != partitions.end(); ++it) count += (*it)->checkAbsentStates("partition " + convertIntToString((it-partitions.begin())+1)); return count; } @@ -1415,9 +1405,8 @@ void SuperAlignment::shuffleAlignment() { double SuperAlignment::computeObsDist(int seq1, int seq2) { - int site; int diff_pos = 0, total_pos = 0; - for (site = 0; site < getNSite(); site++) { + for (size_t site = 0; site < getNSite(); ++site) { int id1 = taxa_index[seq1][site]; int id2 = taxa_index[seq2][site]; if (id1 < 0 || id2 < 0) continue; @@ -1629,7 +1618,7 @@ Alignment *SuperAlignment::concatenateAlignments() { SuperAlignment *saln = new SuperAlignment(); saln->max_num_states = 0; // first build taxa_index and partitions - int site, seq, nsite = ids.size(); + size_t nsite = ids.size(); // BUG FIX 2016-11-29: when merging partitions with -m TESTMERGE, sequence order is changed // get the taxa names from existing tree @@ -1639,12 +1628,12 @@ Alignment *SuperAlignment::concatenateAlignments() { for (auto it = saln->taxa_index.begin(); it != saln->taxa_index.end(); it++) it->resize(nsite, -1); - for (site = 0; site != nsite; site++) { + for (size_t site = 0; site != nsite; ++site) { Alignment *part_aln = concatenateAlignments(ids[site]); saln->partitions.push_back(part_aln); - int nseq = part_aln->getNSeq(); + size_t nseq = part_aln->getNSeq(); //cout << "nseq = " << nseq << endl; - for (seq = 0; seq < nseq; seq++) { + for (size_t seq = 0; seq < nseq; ++seq) { int id = saln->getSeqID(part_aln->getSeqName(seq)); ASSERT(id >= 0); saln->taxa_index[id][site] = seq; @@ -1687,17 +1676,17 @@ void SuperAlignment::orderPatternByNumChars(int pat_type) { int maxi = (num_parsimony_sites+UINT_BITS-1)/UINT_BITS; pars_lower_bound = new UINT[maxi+1]; memset(pars_lower_bound, 0, (maxi+1)*sizeof(UINT)); - int part, nseq = getNSeq(); + size_t nseq = getNSeq(); // compute ordered_pattern ordered_pattern.clear(); // UINT sum_scores[npart]; - for (part = 0; part != partitions.size(); part++) { + for (size_t part = 0; part != partitions.size(); ++part) { partitions[part]->orderPatternByNumChars(pat_type); // partial_partition if (Params::getInstance().partition_type == TOPO_UNLINKED) continue; - for (vector::iterator pit = partitions[part]->ordered_pattern.begin(); pit != partitions[part]->ordered_pattern.end(); pit++) { + for (auto pit = partitions[part]->ordered_pattern.begin(); pit != partitions[part]->ordered_pattern.end(); ++pit) { Pattern pattern(*pit); pattern.resize(nseq); // maximal unknown states for (int j = 0; j < nseq; j++) diff --git a/alignment/superalignmentunlinked.cpp b/alignment/superalignmentunlinked.cpp index d23f28d25..c4a2d215f 100644 --- a/alignment/superalignmentunlinked.cpp +++ b/alignment/superalignmentunlinked.cpp @@ -96,7 +96,7 @@ void SuperAlignmentUnlinked::buildPattern() { /* VerboseMode save_mode = verbose_mode; verbose_mode = min(verbose_mode, VB_MIN); // to avoid printing gappy sites in addPattern - int nseq = getNSeq(); + size_t nseq = getNSeq(); int start_seq = 0; resize(npart, Pattern(nseq)); for (part = 0; part < npart; part++) { diff --git a/booster/booster.c b/booster/booster.c index b69ddef0c..91b1c8d1b 100644 --- a/booster/booster.c +++ b/booster/booster.c @@ -168,6 +168,9 @@ int main_booster (const char* input_tree, const char *boot_trees, /* If true, compute and print in the log file the (normalized) number of moves of each taxa for all branches */ int count_per_branch = 0; + + opterr = 0; + /* static struct option long_options[] = { {"input", required_argument, 0, 'i'}, {"boot" , required_argument, 0, 'b'}, @@ -184,10 +187,8 @@ int main_booster (const char* input_tree, const char *boot_trees, {0, 0, 0, 0} }; - opterr = 0; int option_index = 0; int c = 0; - /* while ((c = getopt_long(argc, argv, "i:a:b:d:o:cs:@:S:n:r:hvq", long_options, &option_index)) != -1){ switch (c){ case 'i': input_tree = optarg; break; diff --git a/booster/tree.c b/booster/tree.c index 4f41149e4..36677fc40 100644 --- a/booster/tree.c +++ b/booster/tree.c @@ -1059,7 +1059,7 @@ void unrooted_to_rooted(Tree* t) { /* utility functions to deal with NH files */ -unsigned int tell_size_of_one_tree(char* filename) { +unsigned int tell_size_of_one_tree(const char* filename) { /* the only purpose of this is to know about the size of a treefile (NH format) in order to save memspace in allocating the string later on */ /* wew open and close this file independently of any other fopen */ unsigned int mysize = 0; diff --git a/booster/tree.h b/booster/tree.h index 5ace7f0b4..8ffedde51 100644 --- a/booster/tree.h +++ b/booster/tree.h @@ -188,7 +188,7 @@ void reorient_edges(Tree *t); void reorient_edges_recur(Node *n, Node *prev, Edge *e); /* utility functions to deal with NH files */ -unsigned int tell_size_of_one_tree(char* filename); +unsigned int tell_size_of_one_tree(const char* filename); int copy_nh_stream_into_str(FILE* nh_stream, char* big_string); /* actually parsing a tree */ diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index 439503353..18121f132 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -521,7 +521,7 @@ void reportRate(ostream &out, PhyloTree &tree) { } void reportTree(ofstream &out, Params ¶ms, PhyloTree &tree, double tree_lh, double lh_variance, double main_tree) { - int ssize = tree.getAlnNSite(); + size_t ssize = tree.getAlnNSite(); double epsilon = 1.0 / ssize; double totalLen = tree.treeLength(); int df = tree.getModelFactory()->getNParameters(BRLEN_OPTIMIZE); @@ -1474,17 +1474,18 @@ void reportPhyloAnalysis(Params ¶ms, IQTree &tree, ModelCheckpoint &model_in } void checkZeroDist(Alignment *aln, double *dist) { - int ntaxa = aln->getNSeq(); + size_t ntaxa = aln->getNSeq(); IntVector checked; checked.resize(ntaxa, 0); - int i, j; - for (i = 0; i < ntaxa - 1; i++) { + auto minLen = Params::getInstance().min_branch_length; + for (size_t i = 0; i < ntaxa - 1; ++i) { if (checked[i]) continue; string str = ""; bool first = true; - for (j = i + 1; j < ntaxa; j++) - if (dist[i * ntaxa + j] <= Params::getInstance().min_branch_length) { + auto distRow = dist + i*ntaxa; + for (size_t j = i + 1; j < ntaxa; ++j) + if (distRow[j] <= minLen ) { if (first) str = "ZERO distance between sequences " + aln->getSeqName(i); @@ -1493,8 +1494,9 @@ void checkZeroDist(Alignment *aln, double *dist) { first = false; } checked[i] = 1; - if (str != "") + if (str != "") { outWarning(str); + } } } @@ -1636,8 +1638,8 @@ void computeMLDist ( Params& params, IQTree& iqtree cout << "Computing ML distances took " << (getRealTime() - begin_wallclock_time) << " sec (of wall-clock time) " << (getCPUTime() - begin_cpu_time) << " sec(of CPU time)" << endl; - int n = iqtree.aln->getNSeq(); - int nSquared = n*n; + size_t n = iqtree.aln->getNSeq(); + size_t nSquared = n*n; if ( iqtree.dist_matrix == nullptr ) { iqtree.dist_matrix = ml_dist; ml_dist = nullptr; diff --git a/main/phylotesting.cpp b/main/phylotesting.cpp index 901bc7700..c384aee65 100644 --- a/main/phylotesting.cpp +++ b/main/phylotesting.cpp @@ -1838,7 +1838,7 @@ double doKmeansClustering(Params ¶ms, PhyloSuperTree *in_tree, } } - int ssize = in_tree->getAlnNSite(); + size_t ssize = in_tree->getAlnNSite(); double score = computeInformationScore(lhsum, dfsum, ssize, params.model_test_criterion); cout << "k-means score for " << ncluster << " partitions: " << score << " (LnL: " << lhsum << " " << "df: " << dfsum << ")" << endl; @@ -1925,7 +1925,7 @@ void testPartitionModel(Params ¶ms, PhyloSuperTree* in_tree, ModelCheckpoint if (params.partition_type == BRLEN_SCALE) dfsum -= 1; } - int ssize = in_tree->getAlnNSite(); + size_t ssize = in_tree->getAlnNSite(); int64_t num_model = 0; int64_t total_num_model = in_tree->size(); diff --git a/main/treetesting.cpp b/main/treetesting.cpp index 8d1ac82f5..777cf2ffe 100644 --- a/main/treetesting.cpp +++ b/main/treetesting.cpp @@ -21,7 +21,6 @@ void printSiteLh(const char*filename, PhyloTree *tree, double *ptn_lh, bool append, const char *linename) { - int i; double *pattern_lh; if (!ptn_lh) { pattern_lh = new double[tree->getAlnNPattern()]; @@ -46,7 +45,7 @@ void printSiteLh(const char*filename, PhyloTree *tree, double *ptn_lh, out.width(10); out << left << linename; } - for (i = 0; i < tree->getAlnNSite(); i++) + for (size_t i = 0; i < tree->getAlnNSite(); i++) out << " " << pattern_lh[pattern_index[i]]; out << endl; out.close(); @@ -337,8 +336,8 @@ void printSiteProbCategory(const char*filename, PhyloTree *tree, SiteLoglType ws for (PhyloSuperTree::iterator it = super_tree->begin(); it != super_tree->end(); it++) { size_t part_ncat = (*it)->getNumLhCat(wsl); (*it)->aln->getSitePatternIndex(pattern_index); - size_t site, nsite = (*it)->aln->getNSite(); - for (site = 0; site < nsite; site++) { + size_t nsite = (*it)->aln->getNSite(); + for (size_t site = 0; site < nsite; ++site) { out << (it-super_tree->begin())+1 << "\t" << site+1; double *prob_cat = ptn_prob_cat + (offset+pattern_index[site]*part_ncat); for (cat = 0; cat < part_ncat; cat++) @@ -349,8 +348,8 @@ void printSiteProbCategory(const char*filename, PhyloTree *tree, SiteLoglType ws } } else { tree->aln->getSitePatternIndex(pattern_index); - int nsite = tree->getAlnNSite(); - for (int site = 0; site < nsite; site++) { + size_t nsite = tree->getAlnNSite(); + for (size_t site = 0; site < nsite; ++site) { out << site+1; double *prob_cat = ptn_prob_cat + pattern_index[site]*ncat; for (cat = 0; cat < ncat; cat++) { @@ -369,8 +368,8 @@ void printSiteProbCategory(const char*filename, PhyloTree *tree, SiteLoglType ws void printSiteStateFreq(const char*filename, PhyloTree *tree, double *state_freqs) { - - int i, j, nsites = tree->getAlnNSite(), nstates = tree->aln->num_states; + size_t nsites = tree->getAlnNSite(); + size_t nstates = tree->aln->num_states; double *ptn_state_freq; if (state_freqs) { ptn_state_freq = state_freqs; @@ -385,11 +384,11 @@ void printSiteStateFreq(const char*filename, PhyloTree *tree, double *state_freq out.open(filename); IntVector pattern_index; tree->aln->getSitePatternIndex(pattern_index); - for (i = 0; i < nsites; i++) { + for (size_t i = 0; i < nsites; ++i) { out.width(6); out << left << i+1 << " "; double *state_freq = &ptn_state_freq[pattern_index[i]*nstates]; - for (j = 0; j < nstates; j++) { + for (size_t j = 0; j < nstates; ++j) { out.width(15); out << state_freq[j] << " "; } @@ -407,18 +406,19 @@ void printSiteStateFreq(const char*filename, PhyloTree *tree, double *state_freq void printSiteStateFreq(const char* filename, Alignment *aln) { if (aln->site_state_freq.empty()) return; - int i, j, nsites = aln->getNSite(), nstates = aln->num_states; + size_t nsites = aln->getNSite(); + int nstates = aln->num_states; try { ofstream out; out.exceptions(ios::failbit | ios::badbit); out.open(filename); IntVector pattern_index; aln->getSitePatternIndex(pattern_index); - for (i = 0; i < nsites; i++) { + for (size_t i = 0; i < nsites; ++i) { out.width(6); out << left << i+1 << " "; double *state_freq = aln->site_state_freq[pattern_index[i]]; - for (j = 0; j < nstates; j++) { + for (size_t j = 0; j < nstates; ++j) { out.width(15); out << state_freq[j] << " "; } @@ -805,7 +805,7 @@ void performAUTest(Params ¶ms, PhyloTree *tree, double *pattern_lhs, vector< #ifdef _OPENMP #pragma omp for schedule(dynamic) #endif - for (k = 0; k < nscales; k++) { + for (int k = 0; k < nscales; ++k) { string str = "SCALE=" + convertDoubleToString(r[k]); for (boot = 0; boot < nboot; boot++) { if (r[k] == 1.0 && boot == 0) @@ -1073,7 +1073,6 @@ void evaluateTrees(string treeset_file, Params ¶ms, IQTree *tree, vector 10000) +#pragma omp parallel if(nptn > 10000) { int *rstream; init_random(params.ran_seed + omp_get_thread_num(), false, &rstream); @@ -1105,7 +1104,7 @@ void evaluateTrees(string treeset_file, Params ¶ms, IQTree *tree, vectoraln->getPatternFreq(boot_samples + (boot*nptn)); else @@ -1211,7 +1210,7 @@ void evaluateTrees(string treeset_file, Params ¶ms, IQTree *tree, vectorgetCurScore(); double *tree_lhs_offset = tree_lhs + (tid*params.topotest_replicates); - for (boot = 0; boot < params.topotest_replicates; boot++) { + for (size_t boot = 0; boot < params.topotest_replicates; boot++) { double lh = 0.0; int *this_boot_sample = boot_samples + (boot*nptn); for (size_t ptn = 0; ptn < nptn; ptn++) @@ -1235,11 +1234,11 @@ void evaluateTrees(string treeset_file, Params ¶ms, IQTree *tree, vector maxL[boot] + params.ufboot_epsilon) { maxL[boot] = tree_lhs_offset[boot]; maxtid[boot] = tid; @@ -1251,7 +1250,7 @@ void evaluateTrees(string treeset_file, Params ¶ms, IQTree *tree, vector orig_diff) info[tid].sh_pvalue += 1.0; //double max_kh_here = max(max_kh[boot]-avg_lh[max_id], tree_lhs_offset[boot]-avg_lh[tid]); @@ -1363,7 +1362,7 @@ void evaluateTrees(string treeset_file, Params ¶ms, IQTree *tree, vectorinit((params.freq_type != FREQ_UNKNOWN) ? params.freq_type : FREQ_EMPIRICAL); - int i; models->pattern_model_map.resize(tree->aln->getNPattern(), -1); - for (i = 0; i < tree->aln->getNSite(); i++) { + for (size_t i = 0; i < tree->aln->getNSite(); ++i) { models->pattern_model_map[tree->aln->getPatternID(i)] = tree->aln->site_model[i]; //cout << "site " << i << " ptn " << tree->aln->getPatternID(i) << " -> model " << site_model[i] << endl; } double *state_freq = new double[model->num_states]; double *rates = new double[model->getNumRateEntries()]; - for (i = 0; i < tree->aln->site_state_freq.size(); i++) { + for (size_t i = 0; i < tree->aln->site_state_freq.size(); ++i) { ModelMarkov *modeli; if (i == 0) { modeli = (ModelMarkov*)createModel(model_str, models_block, (params.freq_type != FREQ_UNKNOWN) ? params.freq_type : FREQ_EMPIRICAL, "", tree); diff --git a/model/modelmarkov.cpp b/model/modelmarkov.cpp index 77dd18343..8c263110d 100644 --- a/model/modelmarkov.cpp +++ b/model/modelmarkov.cpp @@ -563,8 +563,6 @@ double ModelMarkov::computeTrans(double time, int state1, int state2, double &de void ModelMarkov::computeTransDerv(double time, double *trans_matrix, double *trans_derv1, double *trans_derv2, int mixture) { - int i, j, k; - if (!is_reversible) { computeTransMatrix(time, trans_matrix); // First derivative = Q * e^(Qt) @@ -580,19 +578,19 @@ void ModelMarkov::computeTransDerv(double time, double *trans_matrix, derv2_mat = prod; /* - for (i = 0; i < num_states; i++) - for (j = 0; j < num_states; j++) { + for (int i = 0; i < num_states; i++) + for (int j = 0; j < num_states; j++) { double val = 0.0; - for (k = 0; k < num_states; k++) + for (int k = 0; k < num_states; k++) val += rate_matrix[i*num_states+k] * trans_matrix[k*num_states+j]; trans_derv1[i*num_states+j] = val; } // Second derivative = Q * Q * e^(Qt) - for (i = 0; i < num_states; i++) - for (j = 0; j < num_states; j++) { + for (int i = 0; i < num_states; i++) + for (int j = 0; j < num_states; j++) { double val = 0.0; - for (k = 0; k < num_states; k++) + for (int k = 0; k < num_states; k++) val += rate_matrix[i*num_states+k] * trans_derv1[k*num_states+j]; trans_derv2[i*num_states+j] = val; } diff --git a/model/partitionmodelplen.cpp b/model/partitionmodelplen.cpp index 222e9b2f9..967efca07 100644 --- a/model/partitionmodelplen.cpp +++ b/model/partitionmodelplen.cpp @@ -214,23 +214,24 @@ double PartitionModelPlen::optimizeGeneRate(double gradient_epsilon) { PhyloSuperTreePlen *tree = (PhyloSuperTreePlen*)site_rate->getTree(); // BQM 22-05-2015: change to optimize individual rates - int i; double score = 0.0; - double nsites = tree->getAlnNSite(); + size_t nsites = tree->getAlnNSite(); vector brlen; brlen.resize(tree->branchNum); tree->getBranchLengths(brlen); double max_brlen = 0.0; - for (i = 0; i < brlen.size(); i++) - for (int j = 0; j < brlen[i].size(); j++) - if (brlen[i][j] > max_brlen) + for (size_t i = 0; i < brlen.size(); ++i) { + for (size_t j = 0; j < brlen[i].size(); ++j) { + if (brlen[i][j] > max_brlen) { max_brlen = brlen[i][j]; - + } + } + } if (tree->part_order.empty()) tree->computePartitionOrder(); #ifdef _OPENMP -#pragma omp parallel for reduction(+: score) private(i) schedule(dynamic) if(tree->num_threads > 1) +#pragma omp parallel for reduction(+: score) schedule(dynamic) if(tree->num_threads > 1) #endif for (int j = 0; j < tree->size(); j++) { int i = tree->part_order[j]; @@ -246,7 +247,7 @@ double PartitionModelPlen::optimizeGeneRate(double gradient_epsilon) // now normalize the rates double sum = 0.0; size_t nsite = 0; - for (i = 0; i < tree->size(); i++) { + for (size_t i = 0; i < tree->size(); ++i) { sum += tree->part_info[i].part_rate * tree->at(i)->aln->getNSite(); if (tree->at(i)->aln->seq_type == SEQ_CODON && tree->rescale_codon_brlen) nsite += 3*tree->at(i)->aln->getNSite(); @@ -262,7 +263,7 @@ double PartitionModelPlen::optimizeGeneRate(double gradient_epsilon) } tree->scaleLength(sum); sum = 1.0/sum; - for (i = 0; i < tree->size(); i++) + for (size_t i = 0; i < tree->size(); ++i) tree->part_info[i].part_rate *= sum; return score; } diff --git a/model/ratemeyerdiscrete.cpp b/model/ratemeyerdiscrete.cpp index 24b6249d1..320aff350 100644 --- a/model/ratemeyerdiscrete.cpp +++ b/model/ratemeyerdiscrete.cpp @@ -315,7 +315,6 @@ void RateMeyerDiscrete::computeFuncDerv(double value, double &df, double &ddf) { // double lh = 0.0; int nseq = phylo_tree->leafNum; int nstate = phylo_tree->getModel()->num_states; - int i, j, k, state1, state2; ModelSubst *model = phylo_tree->getModel(); int trans_size = nstate * nstate; double *trans_mat = new double[trans_size]; @@ -325,27 +324,31 @@ void RateMeyerDiscrete::computeFuncDerv(double value, double &df, double &ddf) { int *pair_freq = new int[trans_size]; - for (i = 0; i < nseq-1; i++) - for (j = i+1; j < nseq; j++) { + for (size_t i = 0; i + 1 < nseq; ++i) + for (size_t j = i+1; j < nseq; ++j) { memset(pair_freq, 0, trans_size * sizeof(int)); - for (k = 0; k < size(); k++) { + for (size_t k = 0; k < size(); ++k) { if (ptn_cat[k] != optimizing_cat) continue; Pattern *pat = & phylo_tree->aln->at(k); - if ((state1 = pat->at(i)) < nstate && (state2 = pat->at(j)) < nstate) + int state1 = pat->at(i); + int state2 = pat->at(j); + if (state1 < nstate && state2 < nstate) pair_freq[state1*nstate + state2] += pat->frequency; } double dist = dist_mat[i*nseq + j]; double derv1 = 0.0, derv2 = 0.0; model->computeTransDerv(value * dist, trans_mat, trans_derv1, trans_derv2); - for (k = 0; k < trans_size; k++) if (pair_freq[k]) { - double t1 = trans_derv1[k] / trans_mat[k]; - double t2 = trans_derv2[k] / trans_mat[k]; - trans_derv1[k] = t1; - trans_derv2[k] = (t2 - t1*t1); -// lh -= log(trans_mat[k]) * pair_freq[k]; - derv1 += trans_derv1[k] * pair_freq[k]; - derv2 += trans_derv2[k] * pair_freq[k]; - } + for (size_t k = 0; k < trans_size; ++k) { + if (pair_freq[k]) { + double t1 = trans_derv1[k] / trans_mat[k]; + double t2 = trans_derv2[k] / trans_mat[k]; + trans_derv1[k] = t1; + trans_derv2[k] = (t2 - t1*t1); + //lh -= log(trans_mat[k]) * pair_freq[k]; + derv1 += trans_derv1[k] * pair_freq[k]; + derv2 += trans_derv2[k] * pair_freq[k]; + } + } df -= derv1 * dist; ddf -= derv2 * dist * dist; } diff --git a/model/ratemeyerhaeseler.cpp b/model/ratemeyerhaeseler.cpp index 38710bd3c..15411ebf3 100644 --- a/model/ratemeyerhaeseler.cpp +++ b/model/ratemeyerhaeseler.cpp @@ -55,16 +55,16 @@ void RateMeyerHaeseler::readRateFile(char *rate_file) { in.exceptions(ios::failbit | ios::badbit); in.open(rate_file); char line[256]; - int site, i; + int site; double rate; - int nsites = phylo_tree->aln->getNSite(); + size_t nsites = phylo_tree->aln->getNSite(); resize(phylo_tree->aln->getNPattern(), -1.0); int saturated_sites = 0, saturated_ptn = 0; in.getline(line, sizeof(line)); //if (strncmp(line, "Site", 4) != 0) throw "Wrong header line"; - for (i = 0; i < nsites; i++) { + for (size_t i = 0; i < nsites; ++i) { in.getline(line, sizeof(line)); stringstream ss(line); string tmp; @@ -89,7 +89,7 @@ void RateMeyerHaeseler::readRateFile(char *rate_file) { in.exceptions(ios::failbit | ios::badbit); in.close(); - for (i = 0; i < size(); i++) + for (size_t i = 0; i < size(); ++i) if (at(i) < 0.0) throw "Some site has no rate information"; if (saturated_sites) { @@ -154,7 +154,7 @@ void RateMeyerHaeseler::setRates(DoubleVector &rates) { void RateMeyerHaeseler::initializeRates() { - int i, j, rate_id = 0, state1, state2; + int rate_id = 0; int nseq = phylo_tree->leafNum; int nstate = phylo_tree->getModel()->num_states; @@ -165,13 +165,22 @@ void RateMeyerHaeseler::initializeRates() { for (Alignment::iterator pat = phylo_tree->aln->begin(); pat != phylo_tree->aln->end(); pat++, rate_id++) { int diff = 0, total = 0; - for (i = 0; i < nseq-1; i++) if ((state1 = pat->at(i)) < nstate) - for (j = i+1; j < nseq; j++) if ((state2 = pat->at(j)) < nstate) { - //total += dist_mat[state1 * nstate + state2]; - //if (state1 != state2) diff += dist_mat[state1 * nstate + state2]; - total++; - if (state1 != state2) diff++; - } + for (size_t i = 0; i+1 < nseq; ++i) { + int state1 = pat->at(i); + if (state1 < nstate) { + for (size_t j = i+1; j < nseq; ++j) { + int state2 = pat->at(j); + if (state2 < nstate) { + //total += dist_mat[state1 * nstate + state2]; + //if (state1 != state2) diff += dist_mat[state1 * nstate + state2]; + total++; + if (state1 != state2) { + diff++; + } + } + } + } + } if (diff == 0) diff = 1; if (total == 0) total = 1; double obs_diff = double(diff) / total; @@ -303,7 +312,7 @@ double RateMeyerHaeseler::optimizeRate(int pattern) { void RateMeyerHaeseler::optimizeRates() { if (!dist_mat) { - dist_mat = new double[phylo_tree->leafNum * phylo_tree->leafNum]; + dist_mat = new double[(size_t)phylo_tree->leafNum * (size_t)phylo_tree->leafNum]; } // compute the distance based on the path lengths between taxa of the tree phylo_tree->calcDist(dist_mat); @@ -406,42 +415,54 @@ double RateMeyerHaeseler::computeFunction(double value) { } int nseq = phylo_tree->leafNum; int nstate = phylo_tree->getModel()->num_states; - int i, j, state1, state2; double lh = 0.0; ModelSubst *model = phylo_tree->getModel(); Pattern *pat = & phylo_tree->aln->at(optimizing_pattern); - - for (i = 0; i < nseq-1; i++) if ((state1 = pat->at(i)) < nstate) - for (j = i+1; j < nseq; j++) if ((state2 = pat->at(j)) < nstate) - lh -= log(model->computeTrans(value * dist_mat[i*nseq + j], state1, state2)); + + for (size_t i = 0; i < nseq-1; ++i) { + int state1 = pat->at(i); + if (state1 < nstate) { + for (size_t j = i+1; j < nseq; ++j) { + int state2 = pat->at(j); + if (state2 < nstate) { + lh -= log(model->computeTrans(value * dist_mat[i*nseq + j], state1, state2)); + } + } + } + } return lh; } void RateMeyerHaeseler::computeFuncDerv(double value, double &df, double &ddf) { int nseq = phylo_tree->leafNum; int nstate = phylo_tree->getModel()->num_states; - int i, j, state1, state2; -// double lh = 0.0; + // double lh = 0.0; double trans, derv1, derv2; ModelSubst *model = phylo_tree->getModel(); Pattern *pat = & phylo_tree->aln->at(optimizing_pattern); df = ddf = 0.0; - for (i = 0; i < nseq-1; i++) if ((state1 = pat->at(i)) < nstate) - for (j = i+1; j < nseq; j++) if ((state2 = pat->at(j)) < nstate) { - double dist = dist_mat[i*nseq + j]; - trans = model->computeTrans(value * dist, state1, state2, derv1, derv2); -// lh -= log(trans); - double t1 = derv1 / trans; - double t2 = derv2 / trans; - df -= t1 * dist; - ddf -= dist * dist * (t2 - t1*t1); - } -// return lh; + for (size_t i = 0; i + 1 < nseq; ++i) { + int state1 = pat->at(i); + if (state1 < nstate) { + for (size_t j = i+1; j < nseq; ++j) { + int state2 = pat->at(j); + if (state2 < nstate) { + double dist = dist_mat[i*nseq + j]; + trans = model->computeTrans(value * dist, state1, state2, derv1, derv2); + // lh -= log(trans); + double t1 = derv1 / trans; + double t2 = derv2 / trans; + df -= t1 * dist; + ddf -= dist * dist * (t2 - t1*t1); + } + } + } + } + // return lh; } void RateMeyerHaeseler::runIterativeProc(Params ¶ms, IQTree &tree) { - int i; if (verbose_mode >= VB_MED) { ofstream out("x"); out.close(); @@ -452,13 +473,14 @@ void RateMeyerHaeseler::runIterativeProc(Params ¶ms, IQTree &tree) { IntVector pattern_cat; backup_rate->computePatternRates(*this, pattern_cat); double sum = 0.0; - for (i = 0; i < size(); i++) + for (size_t i = 0; i < size(); i++) { sum += at(i) * phylo_tree->aln->at(i).frequency; + } sum /= phylo_tree->aln->getNSite(); if (fabs(sum - 1.0) > 0.0001) { if (verbose_mode >= VB_MED) cout << "Normalizing Gamma rates (" << sum << ")" << endl; - for (i = 0; i < size(); i++) + for (size_t i = 0; i < size(); ++i) at(i) /= sum; } } @@ -476,6 +498,7 @@ void RateMeyerHaeseler::runIterativeProc(Params ¶ms, IQTree &tree) { dist_file += ".tdist"; tree.getModelFactory()->stopStoringTransMatrix(); + int i=2; for (i = 2; i < 100; i++) { //DoubleVector prev_rates; //getRates(prev_rates); diff --git a/ncl/nxscharactersblock.cpp b/ncl/nxscharactersblock.cpp index 4c903825f..10e534d37 100644 --- a/ncl/nxscharactersblock.cpp +++ b/ncl/nxscharactersblock.cpp @@ -1778,7 +1778,8 @@ unsigned NxsCharactersBlock::HandleTokenState( if (respectingCase) cit = find(ci_begin, ci_end, t); else - cit = find_if (ci_begin, ci_end, bind2nd(NxsStringEqual(), t)); + cit = find_if (ci_begin, ci_end, + [=] (const NxsString& s) { return NxsStringEqual()(s, t); } ); if (cit == ci_end) { diff --git a/pll/makenewzGenericSpecial.c b/pll/makenewzGenericSpecial.c index b2b114a8e..50be0fe3e 100644 --- a/pll/makenewzGenericSpecial.c +++ b/pll/makenewzGenericSpecial.c @@ -64,7 +64,7 @@ /* pointers to reduction buffers for storing and gathering the first and second derivative of the likelihood in Pthreads and MPI */ -#if IS_PARALLEL +#ifdef IS_PARALLEL void branchLength_parallelReduce(pllInstance *tr, double *dlnLdlz, double *d2lnLdlz2, int numBranches ) ; //extern double *globalResult; #endif diff --git a/pll/utils.c b/pll/utils.c index 78f19514a..a0aff8a9e 100644 --- a/pll/utils.c +++ b/pll/utils.c @@ -431,8 +431,6 @@ void hookupFull (nodeptr p, nodeptr q, double *z) /* connect node p with q and assign the default branch lengths */ void hookupDefault (nodeptr p, nodeptr q) { - int i; - p->back = q; q->back = p; diff --git a/sprng/lcg64.c b/sprng/lcg64.c index d73efbfe4..63b5ebdf3 100755 --- a/sprng/lcg64.c +++ b/sprng/lcg64.c @@ -596,7 +596,7 @@ int *igen; { struct rngen *gen; - printf("\n%s\n", GENTYPE+2); + printf("\n%s\n", &GENTYPE[2]); gen = (struct rngen *) igen; printf("\n \tseed = %d, stream_number = %d\tparameter = %d\n\n", gen->init_seed, gen->stream_number, gen->parameter); diff --git a/tree/discordance.cpp b/tree/discordance.cpp index 069bfced9..8f0f7acba 100644 --- a/tree/discordance.cpp +++ b/tree/discordance.cpp @@ -166,7 +166,6 @@ void PhyloTree::computeSiteConcordance(Branch &branch, int nquartets, int *rstre double sN = 0.0; size_t sum_sites = 0; double sCF_N = 0, sDF1_N = 0, sDF2_N = 0; - int i; vector support; support.resize(3, 0); // reserve size for partition-wise concordant/discordant sites @@ -176,26 +175,25 @@ void PhyloTree::computeSiteConcordance(Branch &branch, int nquartets, int *rstre // check for gene trees not decisive for this branch StrVector taxname; getTaxaName(taxname); - int part = 0; + size_t part = 0; for (auto part_aln = saln->partitions.begin(); part_aln != saln->partitions.end(); part_aln++, part++) { // get the taxa names of the partition tree StringIntMap name_map; - for (i = 0; i < (*part_aln)->getNSeq(); i++) + for (size_t i = 0; i < (*part_aln)->getNSeq(); i++) name_map[(*part_aln)->getSeqName(i)] = i; // check that at least one taxon from each subtree is present in partition tree - vector::iterator it; int left_count = 0, right_count = 0; - for (it = left_taxa.begin(); it != left_taxa.end(); it++) { - for (auto it2 = it->begin(); it2 != it->end(); it2++) { + for (auto it = left_taxa.begin(); it != left_taxa.end(); ++it) { + for (auto it2 = it->begin(); it2 != it->end(); ++it2) { if (name_map.find(taxname[*it2]) != name_map.end()) { left_count++; break; } } } - for (it = right_taxa.begin(); it != right_taxa.end(); it++) { - for (auto it2 = it->begin(); it2 != it->end(); it2++) { + for (auto it = right_taxa.begin(); it != right_taxa.end(); ++it) { + for (auto it2 = it->begin(); it2 != it->end(); ++it2) { if (name_map.find(taxname[*it2]) != name_map.end()) { right_count++; break; @@ -210,7 +208,7 @@ void PhyloTree::computeSiteConcordance(Branch &branch, int nquartets, int *rstre } } Neighbor *nei = branch.second->findNeighbor(branch.first); - for (i = 0; i < nquartets; i++) { + for (size_t i = 0; i < nquartets; ++i) { // get a random quartet IntVector quartet; quartet.resize(4); @@ -278,7 +276,7 @@ void PhyloTree::computeSiteConcordance(Branch &branch, int nquartets, int *rstre nei->putAttr("sCF_N/sDF1_N/sDF2_N", s_factors_N.str()); // insert key-value for partition-wise con/discordant sites string keys[] = {"sC", "sD1", "sD2"}; - for (i = 3; i < support.size(); i++) { + for (size_t i = 3; i < support.size(); ++i) { if (support[i] >= 0) nei->putAttr(keys[i%3] + convertIntToString(i/3), (double)support[i]/nquartets); else diff --git a/tree/iqtree.cpp b/tree/iqtree.cpp index eac9c829e..fdeb58d35 100644 --- a/tree/iqtree.cpp +++ b/tree/iqtree.cpp @@ -535,7 +535,7 @@ void IQTree::createPLLPartition(Params ¶ms, ostream &pllPartitionFileHandle) model = "WAG"; //outError("PLL currently only supports DNA/protein alignments"); } - int nsite = (aln->seq_type == SEQ_CODON) ? getAlnNSite()*3 : getAlnNSite(); + size_t nsite = (aln->seq_type == SEQ_CODON) ? getAlnNSite()*3 : getAlnNSite(); pllPartitionFileHandle << model << ", p1 = " << "1-" << nsite << endl; } } @@ -1107,7 +1107,7 @@ void IQTree::deleteNonCherryLeaves(PhyloNodeVector &del_leaves) { // get the vector of non cherry taxa getNonCherryLeaves(noncherry_taxa, cherry_taxa); root = NULL; - int num_taxa = aln->getNSeq(); + size_t num_taxa = aln->getNSeq(); int num_delete = k_delete; if (num_delete > num_taxa - 4) num_delete = num_taxa - 4; @@ -1185,7 +1185,7 @@ void IQTree::deleteLeaves(PhyloNodeVector &del_leaves) { int IQTree::assessQuartet(Node *leaf0, Node *leaf1, Node *leaf2, Node *del_leaf) { ASSERT(dist_matrix); - int nseq = aln->getNSeq(); + size_t nseq = aln->getNSeq(); //int id0 = leaf0->id, id1 = leaf1->id, id2 = leaf2->id; double dist0 = dist_matrix[leaf0->id * nseq + del_leaf->id] + dist_matrix[leaf1->id * nseq + leaf2->id]; double dist1 = dist_matrix[leaf1->id * nseq + del_leaf->id] + dist_matrix[leaf0->id * nseq + leaf2->id]; @@ -3590,7 +3590,7 @@ void IQTree::saveCurrentTree(double cur_logl) { IntVector pattern_index; aln->getSitePatternIndex(pattern_index); out_sitelh << "Site_Lh "; - for (int i = 0; i < getAlnNSite(); i++) + for (size_t i = 0; i < getAlnNSite(); ++i) out_sitelh << " " << pattern_lh[pattern_index[i]]; out_sitelh << endl; } @@ -3947,7 +3947,7 @@ void IQTree::printIntermediateTree(int brtype) { if (!(brtype & WT_APPEND)) out_sitelh << aln->getNSite() << endl; out_sitelh << "Site_Lh "; - for (int i = 0; i < aln->getNSite(); i++) + for (size_t i = 0; i < aln->getNSite(); ++i) out_sitelh << "\t" << pattern_lh[aln->getPatternID(i)]; out_sitelh << endl; delete[] pattern_lh; diff --git a/tree/phylokernel.h b/tree/phylokernel.h index 332e95cc4..cb80e51db 100644 --- a/tree/phylokernel.h +++ b/tree/phylokernel.h @@ -313,7 +313,7 @@ void PhyloTree::computePartialLikelihoodEigenSIMD(PhyloNeighbor *dad_branch, Phy sum_scale += LOG_SCALING_THRESHOLD* 4 * ptn_freq[ptn]; //sum_scale += log(lh_max) * ptn_freq[ptn]; dad_branch->scale_num[ptn] += 4; - int nsite = aln->getNSite(); + size_t nsite = aln->getNSite(); for (i = 0, x = 0; i < nsite && x < ptn_freq[ptn]; i++) if (aln->getPatternID(i) == ptn) { outWarning((string)"Numerical underflow for site " + convertIntToString(i+1)); @@ -825,7 +825,7 @@ void PhyloTree::computeLikelihoodDervEigenSIMD(PhyloNeighbor *dad_branch, PhyloN prob_const = 1.0 - prob_const; double df_frac = df_const / prob_const; double ddf_frac = ddf_const / prob_const; - int nsites = aln->getNSite(); + size_t nsites = aln->getNSite(); df += nsites * df_frac; ddf += nsites *(ddf_frac + df_frac*df_frac); } @@ -1766,7 +1766,6 @@ int PhyloTree::computeParsimonyBranchFastSIMD(PhyloNeighbor *dad_branch, PhyloNo computePartialParsimonyFastSIMD(dad_branch, dad); if ((node_branch->partial_lh_computed & 2) == 0) computePartialParsimonyFastSIMD(node_branch, node); - int site; int nstates = aln->getMaxNumStates(); // VectorClass score = 0; @@ -1785,9 +1784,9 @@ int PhyloTree::computeParsimonyBranchFastSIMD(PhyloNeighbor *dad_branch, PhyloNo switch (nstates) { case 4: #ifdef _OPENMP - #pragma omp parallel for private (site) reduction(+: score) if(nsites>num_threads*10) + #pragma omp parallel for reduction(+: score) if(nsites>num_threads*10) #endif - for (site = 0; site < nsites; site++) { + for (int site = 0; site < nsites; site++) { size_t offset = entry_size*site; VectorClass *x = (VectorClass*)(dad_branch->partial_pars + offset); VectorClass *y = (VectorClass*)(node_branch->partial_pars + offset); @@ -1804,9 +1803,9 @@ int PhyloTree::computeParsimonyBranchFastSIMD(PhyloNeighbor *dad_branch, PhyloNo break; default: #ifdef _OPENMP - #pragma omp parallel for private (site) reduction(+: score) if(nsites>num_threads*10) + #pragma omp parallel for reduction(+: score) if(nsites>num_threads*10) #endif - for (site = 0; site < nsites; site++) { + for (size_t site = 0; site < nsites; ++site) { size_t offset = entry_size*site; VectorClass *x = (VectorClass*)(dad_branch->partial_pars + offset); VectorClass *y = (VectorClass*)(node_branch->partial_pars + offset); diff --git a/tree/phylokernelnew.h b/tree/phylokernelnew.h index f91a141fd..faed924e7 100644 --- a/tree/phylokernelnew.h +++ b/tree/phylokernelnew.h @@ -43,65 +43,28 @@ using namespace std; template inline void sumVec(VectorClass *A, VectorClass &X, size_t N) { - if (N == 1) { - if (append) - X += A[0]; - else - X = A[0]; - return; - } - - size_t i; - switch (N % 4) { - case 0: { - VectorClass V[4]; - V[0] = A[0]; - V[1] = A[1]; - V[2] = A[2]; - V[3] = A[3]; - for (i = 4; i < N; i+=4) { - V[0] += A[i]; - V[1] += A[i+1]; - V[2] += A[i+2]; - V[3] += A[i+3]; - } - if (append) - X += (V[0] + V[1]) + (V[2] + V[3]); - else - X = (V[0] + V[1]) + (V[2] + V[3]); - break; + if (!append) { + X=0; } - - case 2: { - VectorClass V[2]; - V[0] = A[0]; - V[1] = A[1]; - for (i = 2; i < N; i+=2) { - V[0] += A[i]; - V[1] += A[i+1]; + size_t cruft = (N & 3); + if (cruftgetNSite(); + size_t nsites = aln->getNSite(); *df += nsites * df_frac; *ddf += nsites *(ddf_frac + df_frac*df_frac); } @@ -3129,8 +3092,6 @@ double PhyloTree::computeLikelihoodFromBufferGenericSIMD() size_t block = ncat_mix * nstates; // size_t tip_block = nstates * model->getNMixtures(); - size_t ptn; // for big data size > 4GB memory required - size_t c, i; size_t orig_nptn = aln->size(); size_t max_orig_nptn = ((orig_nptn+VectorClass::size()-1)/VectorClass::size())*VectorClass::size(); size_t nptn = max_orig_nptn+model_factory->unobserved_ptns.size(); @@ -3140,7 +3101,7 @@ double PhyloTree::computeLikelihoodFromBufferGenericSIMD() size_t mix_addr_nstates[ncat_mix], mix_addr[ncat_mix]; size_t denom = (model_factory->fused_mix_rate) ? 1 : ncat; - for (c = 0; c < ncat_mix; c++) { + for (size_t c = 0; c < ncat_mix; ++c) { size_t m = c/denom; mix_addr_nstates[c] = m*nstates; mix_addr[c] = mix_addr_nstates[c]*nstates; @@ -3154,7 +3115,7 @@ double PhyloTree::computeLikelihoodFromBufferGenericSIMD() double cat_prop[ncat]; if (SITE_MODEL) { - for (c = 0; c < ncat; c++) { + for (size_t c = 0; c < ncat; ++c) { cat_length[c] = site_rate->getRate(c) * current_it->length; cat_prop[c] = site_rate->getProp(c); } @@ -3163,25 +3124,25 @@ double PhyloTree::computeLikelihoodFromBufferGenericSIMD() if (nstates % VectorClass::size() == 0) { VectorClass *vc_val0 = (VectorClass*)val0; size_t loop_size = nstates / VectorClass::size(); - for (c = 0; c < ncat_mix; c++) { + for (size_t c = 0; c < ncat_mix; ++c) { size_t m = c/denom; VectorClass *eval_ptr = (VectorClass*)(eval + mix_addr_nstates[c]); size_t mycat = c%ncat; double prop = site_rate->getProp(mycat) * model->getMixtureWeight(m); double len = site_rate->getRate(mycat) * current_it->getLength(mycat); - for (i = 0; i < loop_size; i++) { + for (size_t i = 0; i < loop_size; ++i) { vc_val0[i] = exp(eval_ptr[i] * len) * prop; } vc_val0 += loop_size; } } else { - for (c = 0; c < ncat_mix; c++) { + for (size_t c = 0; c < ncat_mix; ++c) { size_t m = c/denom; double *eval_ptr = eval + mix_addr_nstates[c]; size_t mycat = c%ncat; double prop = site_rate->getProp(mycat) * model->getMixtureWeight(m); size_t addr = c*nstates; - for (i = 0; i < nstates; i++) { + for (size_t i = 0; i < nstates; ++i) { double cof = eval_ptr[i]*site_rate->getRate(mycat); double val = exp(cof*current_it->getLength(mycat)) * prop; val0[addr+i] = val; @@ -3195,20 +3156,20 @@ double PhyloTree::computeLikelihoodFromBufferGenericSIMD() VectorClass all_tree_lh(0.0), all_prob_const(0.0); #ifdef _OPENMP -#pragma omp parallel private(ptn, i, c) num_threads(num_threads) +#pragma omp parallel num_threads(num_threads) { #endif VectorClass vc_tree_lh(0.0), vc_prob_const(0.0); #ifdef _OPENMP #pragma omp for schedule(static) nowait #endif - for (ptn = 0; ptn < nptn; ptn+=VectorClass::size()) { + for (size_t ptn = 0; ptn < nptn; ptn+=VectorClass::size()) { VectorClass lh_ptn(0.0); VectorClass *theta = (VectorClass*)(theta_all + ptn*block); if (SITE_MODEL) { VectorClass *eval_ptr = (VectorClass*)&eval[ptn*nstates]; // lh_ptn.load_a(&ptn_invar[ptn]); - for (c = 0; c < ncat; c++) { + for (size_t c = 0; c < ncat; c++) { VectorClass lh_cat; #ifdef KERNEL_FIX_STATES dotProductExp(eval_ptr, theta, cat_length[c], lh_cat); @@ -3242,7 +3203,7 @@ double PhyloTree::computeLikelihoodFromBufferGenericSIMD() if (horizontal_or(VectorClass().load_a(&buffer_scale_all[ptn]) != 0.0)) { // some entries are rescaled double *lh_ptn_dbl = (double*)&lh_ptn; - for (i = 0; i < VectorClass::size(); i++) + for (size_t i = 0; i < VectorClass::size(); i++) if (buffer_scale_all[ptn+i] != 0.0) lh_ptn_dbl[i] *= SCALING_THRESHOLD; } @@ -3275,7 +3236,7 @@ double PhyloTree::computeLikelihoodFromBufferGenericSIMD() // arbitrarily fix tree_lh if underflown for some sites if (!std::isfinite(tree_lh)) { tree_lh = 0.0; - for (ptn = 0; ptn < orig_nptn; ptn++) { + for (size_t ptn = 0; ptn < orig_nptn; ++ptn) { if (!std::isfinite(_pattern_lh[ptn])) { _pattern_lh[ptn] = LOG_SCALING_THRESHOLD*4; // log(2^(-1024)) } @@ -3289,15 +3250,15 @@ double PhyloTree::computeLikelihoodFromBufferGenericSIMD() size_t step_unobserved_ptns = model_factory->unobserved_ptns.size() / nstates; double *const_lh_next = const_lh + step_unobserved_ptns; for (int step = 1; step < nstates; step++, const_lh_next += step_unobserved_ptns) { - for (ptn = 0; ptn < orig_nptn; ptn+=VectorClass::size()) + for (size_t ptn = 0; ptn < orig_nptn; ptn+=VectorClass::size()) (VectorClass().load_a(&const_lh[ptn]) + VectorClass().load_a(&const_lh_next[ptn])).store_a(&const_lh[ptn]); } // cutoff the last entries if going beyond - for (ptn = orig_nptn; ptn < max_orig_nptn; ptn++) + for (size_t ptn = orig_nptn; ptn < max_orig_nptn; ++ptn) { const_lh[ptn] = 0.0; - + } VectorClass sum_corr = 0.0; - for (ptn = 0; ptn < orig_nptn; ptn+=VectorClass::size()) { + for (size_t ptn = 0; ptn < orig_nptn; ptn+=VectorClass::size()) { VectorClass prob_variant = log(1.0 - VectorClass().load_a(&const_lh[ptn])); (VectorClass().load_a(&_pattern_lh[ptn]) - prob_variant).store_a(&_pattern_lh[ptn]); sum_corr += prob_variant*VectorClass().load_a(&ptn_freq[ptn]); @@ -3319,9 +3280,10 @@ double PhyloTree::computeLikelihoodFromBufferGenericSIMD() // _pattern_lh_cat[ptn] *= inv_const; prob_const = log(1.0 - prob_const); - for (ptn = 0; ptn < orig_nptn; ptn+=VectorClass::size()) + for (size_t ptn = 0; ptn < orig_nptn; ptn+=VectorClass::size()) { (VectorClass().load_a(&_pattern_lh[ptn])-prob_const).store_a(&_pattern_lh[ptn]); // _pattern_lh[ptn] -= prob_const; + } tree_lh -= aln->getNSite()*prob_const; ASSERT(std::isfinite(tree_lh)); } diff --git a/tree/phylokernelnonrev.h b/tree/phylokernelnonrev.h index 0a8faa31f..bbc78abd9 100644 --- a/tree/phylokernelnonrev.h +++ b/tree/phylokernelnonrev.h @@ -1013,7 +1013,7 @@ void PhyloTree::computeNonrevLikelihoodDervGenericSIMD(PhyloNeighbor *dad_branch prob_const = 1.0 - prob_const; double df_frac = df_const / prob_const; double ddf_frac = ddf_const / prob_const; - int nsites = aln->getNSite(); + size_t nsites = aln->getNSite(); *df += nsites * df_frac; *ddf += nsites *(ddf_frac + df_frac*df_frac); } diff --git a/tree/phylosupertree.cpp b/tree/phylosupertree.cpp index 2fba9c41c..fce36d9ca 100644 --- a/tree/phylosupertree.cpp +++ b/tree/phylosupertree.cpp @@ -354,15 +354,15 @@ Node* PhyloSuperTree::newNode(int node_id, int node_name) { return (Node*) (new SuperNode(node_id, node_name)); } -int PhyloSuperTree::getAlnNPattern() { - int num = 0; +size_t PhyloSuperTree::getAlnNPattern() { + size_t num = 0; for (iterator it = begin(); it != end(); it++) num += (*it)->getAlnNPattern(); return num; } -int PhyloSuperTree::getAlnNSite() { - int num = 0; +size_t PhyloSuperTree::getAlnNSite() { + size_t num = 0; for (iterator it = begin(); it != end(); it++) num += (*it)->getAlnNSite(); return num; @@ -1378,14 +1378,15 @@ void PhyloSuperTree::computeMarginalAncestralState(PhyloNeighbor *dad_branch, Ph void PhyloSuperTree::writeMarginalAncestralState(ostream &out, PhyloNode *node, double *ptn_ancestral_prob, int *ptn_ancestral_seq) { int part = 1; - for (auto it = begin(); it != end(); it++, part++) { - size_t site, nsites = (*it)->getAlnNSite(), nstates = (*it)->model->num_states; - for (site = 0; site < nsites; site++) { + for (auto it = begin(); it != end(); ++it, ++part) { + size_t nsites = (*it)->getAlnNSite(); + int nstates = (*it)->model->num_states; + for (size_t site = 0; site < nsites; ++site) { int ptn = (*it)->aln->getPatternID(site); out << node->name << "\t" << part << "\t" << site+1 << "\t"; out << (*it)->aln->convertStateBackStr(ptn_ancestral_seq[ptn]); double *state_prob = ptn_ancestral_prob + ptn*nstates; - for (size_t j = 0; j < nstates; j++) { + for (int j = 0; j < nstates; ++j) { out << "\t" << state_prob[j]; } out << endl; @@ -1410,7 +1411,7 @@ void PhyloSuperTree::endMarginalAncestralState(bool orig_kernel_nonrev, aligned_free(ptn_ancestral_seq); aligned_free(ptn_ancestral_prob); - for (auto it = rbegin(); it != rend(); it++) { + for (auto it = rbegin(); it != rend(); ++it) { aligned_free((*it)->_pattern_lh_cat_state); (*it)->_pattern_lh_cat_state = NULL; } @@ -1418,14 +1419,14 @@ void PhyloSuperTree::endMarginalAncestralState(bool orig_kernel_nonrev, void PhyloSuperTree::writeSiteLh(ostream &out, SiteLoglType wsl, int partid) { int part = 1; - for (auto it = begin(); it != end(); it++, part++) + for (auto it = begin(); it != end(); ++it, ++part) (*it)->writeSiteLh(out, wsl, part); } void PhyloSuperTree::writeSiteRates(ostream &out, bool bayes, int partid) { int part = 1; - for (iterator it = begin(); it != end(); it++, part++) { + for (iterator it = begin(); it != end(); ++it, ++part) { (*it)->writeSiteRates(out, bayes, part); } } diff --git a/tree/phylosupertree.h b/tree/phylosupertree.h index 248361150..2efd1fe1f 100644 --- a/tree/phylosupertree.h +++ b/tree/phylosupertree.h @@ -191,12 +191,12 @@ class PhyloSuperTree : public IQTree, public vector /** * @return number of alignment patterns */ - virtual int getAlnNPattern(); + virtual size_t getAlnNPattern(); /** * @return number of alignment sites */ - virtual int getAlnNSite(); + virtual size_t getAlnNSite(); /** compute the distance between 2 sequences. diff --git a/tree/phylotree.cpp b/tree/phylotree.cpp index f91c5a0b0..6f9e1f6cc 100644 --- a/tree/phylotree.cpp +++ b/tree/phylotree.cpp @@ -376,8 +376,8 @@ void PhyloTree::copyPhyloTreeMixlen(PhyloTree *tree, int mix) { void PhyloTree::setAlignment(Alignment *alignment) { aln = alignment; bool err = false; - int nseq = aln->getNSeq(); - for (int seq = 0; seq < nseq; seq++) { + size_t nseq = aln->getNSeq(); + for (size_t seq = 0; seq < nseq; seq++) { string seq_name = aln->getSeqName(seq); Node *node = findLeafName(seq_name); if (!node) { @@ -1403,30 +1403,31 @@ void PhyloTree::computePatternStateFreq(double *ptn_state_freq) { ASSERT(getModel()->isMixture()); computePatternLhCat(WSL_MIXTURE); double *lh_cat = _pattern_lh_cat; - size_t ptn, nptn = getAlnNPattern(), m, nmixture = getModel()->getNMixtures(); + size_t nptn = getAlnNPattern(); + size_t nmixture = getModel()->getNMixtures(); double *ptn_freq = ptn_state_freq; - size_t state, nstates = aln->num_states; + size_t nstates = aln->num_states; // ModelMixture *models = (ModelMixture*)model; if (params->print_site_state_freq == WSF_POSTERIOR_MEAN) { cout << "Computing posterior mean site frequencies...." << endl; // loop over all site-patterns - for (ptn = 0; ptn < nptn; ptn++) { + for (size_t ptn = 0; ptn < nptn; ++ptn) { // first compute posterior for each mixture component double sum_lh = 0.0; - for (m = 0; m < nmixture; m++) { + for (size_t m = 0; m < nmixture; ++m) { sum_lh += lh_cat[m]; } sum_lh = 1.0/sum_lh; - for (m = 0; m < nmixture; m++) { + for (size_t m = 0; m < nmixture; ++m) { lh_cat[m] *= sum_lh; } // now compute state frequencies - for (state = 0; state < nstates; state++) { + for (size_t state = 0; state < nstates; ++state) { double freq = 0; - for (m = 0; m < nmixture; m++) + for (size_t m = 0; m < nmixture; ++m) freq += model->getMixtureClass(m)->state_freq[state] * lh_cat[m]; ptn_freq[state] = freq; } @@ -1438,11 +1439,10 @@ void PhyloTree::computePatternStateFreq(double *ptn_state_freq) { } else if (params->print_site_state_freq == WSF_POSTERIOR_MAX) { cout << "Computing posterior max site frequencies...." << endl; // loop over all site-patterns - for (ptn = 0; ptn < nptn; ptn++) { - + for (size_t ptn = 0; ptn < nptn; ++ptn) { // first compute posterior for each mixture component size_t max_comp = 0; - for (m = 1; m < nmixture; m++) + for (size_t m = 1; m < nmixture; ++m) if (lh_cat[m] > lh_cat[max_comp]) { max_comp = m; } @@ -1717,9 +1717,8 @@ int PhyloTree::computePatternCategories(IntVector *pattern_ncat) { } double PhyloTree::computeLogLVariance(double *ptn_lh, double tree_lh) { - int i; - int nptn = getAlnNPattern(); - int nsite = getAlnNSite(); + size_t nptn = getAlnNPattern(); + size_t nsite = getAlnNSite(); double *pattern_lh = ptn_lh; if (!ptn_lh) { pattern_lh = new double[nptn]; @@ -1728,12 +1727,12 @@ double PhyloTree::computeLogLVariance(double *ptn_lh, double tree_lh) { IntVector pattern_freq; aln->getPatternFreq(pattern_freq); if (tree_lh == 0.0) { - for (i = 0; i < nptn; i++) + for (size_t i = 0; i < nptn; ++i) tree_lh += pattern_lh[i] * pattern_freq[i]; } double avg_site_lh = tree_lh / nsite; double variance = 0.0; - for (i = 0; i < nptn; i++) { + for (size_t i = 0; i < nptn; ++i) { double diff = (pattern_lh[i] - avg_site_lh); variance += diff * diff * pattern_freq[i]; } @@ -1745,9 +1744,8 @@ double PhyloTree::computeLogLVariance(double *ptn_lh, double tree_lh) { } double PhyloTree::computeLogLDiffVariance(double *pattern_lh_other, double *ptn_lh) { - int i; - int nptn = getAlnNPattern(); - int nsite = getAlnNSite(); + size_t nptn = getAlnNPattern(); + size_t nsite = getAlnNSite(); double *pattern_lh = ptn_lh; if (!ptn_lh) { pattern_lh = new double[nptn]; @@ -1757,11 +1755,11 @@ double PhyloTree::computeLogLDiffVariance(double *pattern_lh_other, double *ptn_ aln->getPatternFreq(pattern_freq); double avg_site_lh_diff = 0.0; - for (i = 0; i < nptn; i++) + for (size_t i = 0; i < nptn; ++i) avg_site_lh_diff += (pattern_lh[i] - pattern_lh_other[i]) * pattern_freq[i]; avg_site_lh_diff /= nsite; double variance = 0.0; - for (i = 0; i < nptn; i++) { + for (size_t i = 0; i < nptn; ++i) { double diff = (pattern_lh[i] - pattern_lh_other[i] - avg_site_lh_diff); variance += diff * diff * pattern_freq[i]; } @@ -2088,7 +2086,7 @@ void PhyloTree::computeAllSubtreeDistForOneNode(PhyloNode* source, PhyloNode* so return; } else if (source->isLeaf() && dad->isLeaf()) { ASSERT(dist_matrix); - int nseq = aln->getNSeq(); + size_t nseq = aln->getNSeq(); if (params->ls_var_type == OLS) { dist = dist_matrix[dad->id * nseq + source->id]; weight = 1.0; @@ -2296,7 +2294,7 @@ void PhyloTree::approxAllBranches(PhyloNode *node, PhyloNode *dad) { if (dad->isLeaf() && node->isLeaf()) { string key = nodePair2String(dad, node); assert(dist_matrix); - int nseq = aln->getNSeq(); + size_t nseq = aln->getNSeq(); double dist = dist_matrix[dad->id * nseq + node->id]; interSubtreeDistances.insert(StringDoubleMap::value_type(key, dist)); } else if (!dad->isLeaf() && node->isLeaf()) { @@ -3036,7 +3034,7 @@ double PhyloTree::addTaxonML(Node *added_node, Node* &target_node, Node* &target void PhyloTree::growTreeML(Alignment *alignment) { cout << "Stepwise addition using ML..." << endl; aln = alignment; - int size = aln->getNSeq(); + size_t size = aln->getNSeq(); if (size < 3) outError(ERR_FEW_TAXA); @@ -3181,22 +3179,25 @@ double PhyloTree::computeDist(int seq1, int seq2, double initial_dist) { } double PhyloTree::correctDist(double *dist_mat) { - int i, j, k, pos; - int n = aln->getNSeq(); - int nsqr = n * n; + size_t n = aln->getNSeq(); + size_t nsqr = n * n; // use Floyd algorithm to find shortest path between all pairs of taxa - for (k = 0; k < n; k++) - for (i = 0, pos = 0; i < n; i++) - for (j = 0; j < n; j++, pos++) { + for (size_t k = 0; k < n; ++k) { + for (size_t i = 0, pos = 0; i < n; ++i) { + for (size_t j = 0; j < n; ++j, ++pos) { double tmp = dist_mat[i * n + k] + dist_mat[k * n + j]; - if (dist_mat[pos] > tmp) + if (dist_mat[pos] > tmp) { dist_mat[pos] = tmp; + } } + } + } double longest_dist = 0.0; - for (i = 0; i < nsqr; i++) - if (dist_mat[i] > longest_dist) + for (size_t i = 0; i < nsqr; ++i) { + if (dist_mat[i] > longest_dist) { longest_dist = dist_mat[i]; - + } + } return longest_dist; } @@ -3241,12 +3242,12 @@ template double computeDistanceMatrix double hamming = hammingDistance ( unknown, thisSequence, otherSequence , seqLen, frequencyVector, unknownFreq ); - if (0compute_obs_dist; //Use uncorrected (observed) distances - int seqCount = aln->getNSeq(); + size_t seqCount = aln->getNSeq(); bool workToDo = false; cout.precision(6); EX_TRACE("Checking if distances already calculated..."); @@ -3369,9 +3370,11 @@ double PhyloTree::computeDist_Experimental(double *dist_mat, double *var_mat) { } //Todo: Shouldn't this be totalFrequency, rather than // totalFrequencyOfNonConst sites? + double denominator = s.totalFrequencyOfNonConstSites + + s.totalFrequency - aln->num_variant_sites; EX_TRACE("Maximum possible uncorrected length " << ((double)maxDistance / (double)s.totalFrequencyOfNonConstSites) - << " Denominator " << s.totalFrequencyOfNonConstSites); + << " Denominator " << denominator); if ( 256 < s.maxState - s.minState ) { EX_TRACE("Falling back to stock distance calculation"); double longest = computeDist(dist_mat, var_mat); @@ -3387,17 +3390,16 @@ double PhyloTree::computeDist_Experimental(double *dist_mat, double *var_mat) { double longest = computeDistanceMatrix ( params->ls_var_type, static_cast(aln->STATE_UNKNOWN) , s.sequenceMatrix, s.sequenceCount, s.sequenceLength - , s.totalFrequencyOfNonConstSites, frequencies - , aln->num_states, uncorrected, dist_mat, var_mat); + , denominator, frequencies, aln->num_states + , uncorrected, dist_mat, var_mat); EX_TRACE("Longest distance was " << longest); return longest; } double PhyloTree::computeDist(double *dist_mat, double *var_mat) { prepareToComputeDistances(); - int nseqs = aln->getNSeq(); - int pos = 0; - int num_pairs = nseqs * (nseqs - 1) / 2; + size_t nseqs = aln->getNSeq(); + size_t num_pairs = nseqs * (nseqs - 1) / 2; double longest_dist = 0.0; cout.precision(6); double baseTime = getRealTime(); @@ -3405,7 +3407,7 @@ double PhyloTree::computeDist(double *dist_mat, double *var_mat) { int *col_id = new int[num_pairs]; row_id[0] = 0; col_id[0] = 1; - for (pos = 1; pos < num_pairs; pos++) { + for (size_t pos = 1; pos < num_pairs; ++pos) { row_id[pos] = row_id[pos - 1]; col_id[pos] = col_id[pos - 1] + 1; if (col_id[pos] >= nseqs) { @@ -3416,16 +3418,16 @@ double PhyloTree::computeDist(double *dist_mat, double *var_mat) { EX_TRACE("Decided on visit order"); //compute the upper-triangle of distance matrix #ifdef _OPENMP -#pragma omp parallel for private(pos) +#pragma omp parallel for #endif - for (pos = 0; pos < num_pairs; pos++) { + for (size_t pos = 0; pos < num_pairs; ++pos) { int threadNum = omp_get_thread_num(); AlignmentPairwise* processor = distanceProcessors[threadNum]; - int seq1 = row_id[pos]; - int seq2 = col_id[pos]; - int sym_pos = seq1 * nseqs + seq2; + size_t seq1 = row_id[pos]; + size_t seq2 = col_id[pos]; + size_t sym_pos = seq1 * nseqs + seq2; double d2l = var_mat[sym_pos]; // moved here for thread-safe (OpenMP) - dist_mat[sym_pos] = processor->computeDist(seq1, seq2, dist_mat[sym_pos], d2l); + dist_mat[sym_pos] = processor->recomputeDist(seq1, seq2, dist_mat[sym_pos], d2l); if (params->ls_var_type == OLS) var_mat[sym_pos] = 1.0; else if (params->ls_var_type == WLS_PAUPLIN) @@ -3439,9 +3441,9 @@ double PhyloTree::computeDist(double *dist_mat, double *var_mat) { } //cout << (getRealTime()-baseTime) << "s Copying to lower triangle" << endl; //copy upper-triangle into lower-triangle and set diagonal = 0 - for (int seq1 = 0; seq1 < nseqs; seq1++) - for (int seq2 = 0; seq2 <= seq1; seq2++) { - pos = seq1 * nseqs + seq2; + for (size_t seq1 = 0; seq1 < nseqs; ++seq1) + for (size_t seq2 = 0; seq2 <= seq1; ++seq2) { + size_t pos = seq1 * nseqs + seq2; if (seq1 == seq2) { dist_mat[pos] = 0.0; var_mat[pos] = 0.0; @@ -3483,13 +3485,15 @@ double PhyloTree::computeDist(Params ¶ms, Alignment *alignment, double* &dis aln = alignment; if (!dist_mat) { - int n = alignment->getNSeq(); - int nSquared = n*n; - dist_mat = new double[nSquared]; + size_t n = alignment->getNSeq(); + size_t nSquared = n*n; + dist_mat = new double[nSquared]; memset(dist_mat, 0, sizeof(double) * nSquared); - var_mat = new double[nSquared]; + var_mat = new double[nSquared]; + #ifdef _OPENMP #pragma omp parallel for - for (int i = 0; i < nSquared; i++) { + #endif + for (size_t i = 0; i < nSquared; i++) { var_mat[i] = 1.0; } } @@ -3514,23 +3518,26 @@ void PhyloTree::printDistanceFile() { } double PhyloTree::computeObsDist(double *dist_mat) { - int nseqs = aln->getNSeq(); - int pos = 0; + size_t nseqs = aln->getNSeq(); double longest_dist = 0.0; - for (int seq1 = 0; seq1 < nseqs; seq1++) - for (int seq2 = 0; seq2 < nseqs; seq2++, pos++) { + #ifdef _OPENMP + #pragma omp parallel for + #endif + for (size_t seq1 = 0; seq1 < nseqs; ++seq1) { + size_t pos = seq1*nseqs; + for (size_t seq2 = 0; seq2 < nseqs; ++seq2, ++pos) { if (seq1 == seq2) dist_mat[pos] = 0.0; else if (seq2 > seq1) { dist_mat[pos] = aln->computeObsDist(seq1, seq2); } else dist_mat[pos] = dist_mat[seq2 * nseqs + seq1]; - - if (dist_mat[pos] > longest_dist) + if (dist_mat[pos] > longest_dist) { longest_dist = dist_mat[pos]; + } } + } return longest_dist; - //return correctDist(dist_mat); } double PhyloTree::computeObsDist(Params ¶ms, Alignment *alignment, double* &dist_mat) { @@ -3540,8 +3547,10 @@ double PhyloTree::computeObsDist(Params ¶ms, Alignment *alignment, double* & dist_file += ".obsdist"; if (!dist_mat) { - dist_mat = new double[alignment->getNSeq() * alignment->getNSeq()]; - memset(dist_mat, 0, sizeof(double) * alignment->getNSeq() * alignment->getNSeq()); + size_t n = alignment->getNSeq(); + size_t nSquared = n * n; + dist_mat = new double[nSquared]; + memset(dist_mat, 0, sizeof(double) * nSquared); } longest_dist = computeObsDist(dist_mat); return longest_dist; @@ -5501,11 +5510,12 @@ void PhyloTree::computeSeqIdentityAlongTree(Split &sp, Node *node, Node *dad) { } if (!dad) return; // now going along alignment to compute seq identity - int ident = 0, nseqs = aln->getNSeq(); + int ident = 0; + size_t nseqs = aln->getNSeq(); for (Alignment::iterator it = aln->begin(); it != aln->end(); it++) { char state = aln->STATE_UNKNOWN; bool is_const = true; - for (int i = 0; i != nseqs; i++) + for (size_t i = 0; i < nseqs; ++i) { if ((*it)[i] < aln->num_states && sp.containTaxon(i)) { if (state < aln->num_states && state != (*it)[i]) { is_const = false; @@ -5513,8 +5523,10 @@ void PhyloTree::computeSeqIdentityAlongTree(Split &sp, Node *node, Node *dad) { } state = (*it)[i]; } - if (is_const) + } + if (is_const) { ident += it->frequency; + } } ident = (ident*100)/aln->getNSite(); if (node->name == "") @@ -5873,13 +5885,13 @@ void PhyloTree::writeSiteLh(ostream &out, SiteLoglType wsl, int partid) { wsl = WSL_MIXTURE; } } - size_t i, nsites = getAlnNSite(); + size_t nsites = getAlnNSite(); size_t ncat = getNumLhCat(wsl); double *pattern_lh, *pattern_lh_cat; pattern_lh = aligned_alloc(getAlnNPattern()); pattern_lh_cat = aligned_alloc(getAlnNPattern()*ncat); computePatternLikelihood(pattern_lh, NULL, pattern_lh_cat, wsl); - for (i = 0; i < nsites; i++) { + for (size_t i = 0; i < nsites; ++i) { if (partid >= 0) out << partid << "\t"; size_t ptn = aln->getPatternID(i); @@ -5904,15 +5916,14 @@ void PhyloTree::writeSiteRates(ostream &out, bool bayes, int partid) { optimizePatternRates(pattern_rates); if (pattern_rates.empty()) return; - int nsite = aln->getNSite(); - int i; + size_t nsite = aln->getNSite(); out.setf(ios::fixed,ios::floatfield); out.precision(5); //cout << __func__ << endl; IntVector count; count.resize(ncategory, 0); - for (i = 0; i < nsite; i++) { + for (size_t i = 0; i < nsite; ++i) { int ptn = aln->getPatternID(i); if (partid >= 0) out << partid << "\t"; @@ -5939,7 +5950,7 @@ void PhyloTree::writeSiteRates(ostream &out, bool bayes, int partid) { } if (bayes) { cout << "Empirical proportions for each category:"; - for (i = 0; i < count.size(); i++) + for (size_t i = 0; i < count.size(); ++i) cout << " " << ((double)count[i])/nsite; cout << endl; } diff --git a/tree/phylotree.h b/tree/phylotree.h index 6422a5298..61f1052d6 100644 --- a/tree/phylotree.h +++ b/tree/phylotree.h @@ -541,14 +541,14 @@ class PhyloTree : public MTree, public Optimization, public CheckpointFactory { /** * @return number of alignment patterns */ - virtual int getAlnNPattern() { + virtual size_t getAlnNPattern() { return aln->getNPattern(); } /** * @return number of alignment sites */ - virtual int getAlnNSite() { + virtual size_t getAlnNSite() { return aln->getNSite(); } diff --git a/tree/phylotreemixlen.cpp b/tree/phylotreemixlen.cpp index 746ade92f..3d44e7c0a 100644 --- a/tree/phylotreemixlen.cpp +++ b/tree/phylotreemixlen.cpp @@ -240,7 +240,7 @@ void PhyloTreeMixlen::initializeMixlen(double tolerance, bool write_info) { } // optimize rate model - double tree_lh = relative_rate->optimizeParameters(tolerance); + /*double tree_lh =*/ (void) relative_rate->optimizeParameters(tolerance); // model_factory->optimizeParameters(params->fixed_branch_length, false, tolerance); @@ -386,7 +386,7 @@ void PhyloTreeMixlen::optimizeOneBranch(PhyloNode *node1, PhyloNode *node2, bool upper_bound[i] = params->max_branch_length; } - double score = minimizeNewtonMulti(lower_bound, variables, upper_bound, params->min_branch_length, mixlen); + /*double score =*/ (void) minimizeNewtonMulti(lower_bound, variables, upper_bound, params->min_branch_length, mixlen); for (i = 0; i < mixlen; i++) { current_it->setLength(i, variables[i]); current_it_back->setLength(i, variables[i]); @@ -886,7 +886,7 @@ void PhyloTreeMixlen::computeFuncDerv(double value, double &df, double &ddf) { prob_const = 1.0 - prob_const; double df_frac = df_const / prob_const; double ddf_frac = ddf_const / prob_const; - int nsites = aln->getNSite(); + size_t nsites = aln->getNSite(); df += nsites * df_frac; ddf += nsites *(ddf_frac + df_frac*df_frac); } diff --git a/tree/phylotreepars.cpp b/tree/phylotreepars.cpp index b1a2b1131..32ce1a515 100644 --- a/tree/phylotreepars.cpp +++ b/tree/phylotreepars.cpp @@ -248,7 +248,6 @@ int PhyloTree::computeParsimonyBranchFast(PhyloNeighbor *dad_branch, PhyloNode * computePartialParsimonyFast(dad_branch, dad); if ((node_branch->partial_lh_computed & 2) == 0) computePartialParsimonyFast(node_branch, node); - int site; int nsites = (aln->num_parsimony_sites + UINT_BITS-1) / UINT_BITS; int nstates = aln->getMaxNumStates(); @@ -261,9 +260,9 @@ int PhyloTree::computeParsimonyBranchFast(PhyloNeighbor *dad_branch, PhyloNode * switch (nstates) { case 4: #ifdef _OPENMP - #pragma omp parallel for private (site) reduction(+: score) if(nsites>200) + #pragma omp parallel for reduction(+: score) if(nsites>200) #endif - for (site = 0; site < nsites; site++) { + for (int site = 0; site < nsites; ++site) { size_t offset = 4*site; UINT *x = dad_branch->partial_pars + offset; UINT *y = node_branch->partial_pars + offset; @@ -278,10 +277,10 @@ int PhyloTree::computeParsimonyBranchFast(PhyloNeighbor *dad_branch, PhyloNode * break; default: #ifdef _OPENMP - #pragma omp parallel for private (site) reduction(+: score) if(nsites > 800/nstates) + #pragma omp parallel for reduction(+: score) if(nsites > 800/nstates) #endif - for (site = 0; site < nsites; site++) { - size_t offset = nstates*site; + for (int site = 0; site < nsites; ++site) { + size_t offset = nstates * site; UINT *x = dad_branch->partial_pars + offset; UINT *y = node_branch->partial_pars + offset; int i; @@ -902,9 +901,9 @@ int PhyloTree::computeParsimonyBranchSankoff(PhyloNeighbor *dad_branch, PhyloNod void PhyloTree::create3TaxonTree(IntVector &taxon_order, int *rand_stream) { freeNode(); - int nseq = aln->getNSeq(); + size_t nseq = aln->getNSeq(); taxon_order.resize(nseq); - for (int i = 0; i < nseq; i++) + for (size_t i = 0; i < nseq; ++i) taxon_order[i] = i; // randomize the addition order my_random_shuffle(taxon_order.begin(), taxon_order.end(), rand_stream); @@ -912,7 +911,7 @@ void PhyloTree::create3TaxonTree(IntVector &taxon_order, int *rand_stream) { root = newNode(nseq); // create star tree - for (leafNum = 0; leafNum < 3; leafNum++) { + for (int leafNum = 0; leafNum < 3; ++leafNum) { if (leafNum < 3 && verbose_mode >= VB_MAX) cout << "Add " << aln->getSeqName(taxon_order[leafNum]) << " to the tree" << endl; Node *new_taxon = newNode(taxon_order[leafNum], aln->getSeqName(taxon_order[leafNum]).c_str()); @@ -954,7 +953,7 @@ void PhyloTree::copyConstraintTree(MTree *tree, IntVector &taxon_order, int *ran (*it)->id = aln->getNSeq() + (it - nodes.begin()); // add the remaining taxa - for (int i = 0; i < aln->getNSeq(); i++) + for (size_t i = 0; i < aln->getNSeq(); ++i) if (!pushed[i]) { taxon_order.push_back(i); } @@ -1035,7 +1034,7 @@ void PhyloTree::insertNode2Branch(Node* added_node, Node* target_node, Node* tar int PhyloTree::computeParsimonyTree(const char *out_prefix, Alignment *alignment, int *rand_stream) { aln = alignment; - int nseq = aln->getNSeq(); + size_t nseq = aln->getNSeq(); if (nseq < 3) outError(ERR_FEW_TAXA); diff --git a/tree/phylotreesse.cpp b/tree/phylotreesse.cpp index 85197d59e..1d300594a 100644 --- a/tree/phylotreesse.cpp +++ b/tree/phylotreesse.cpp @@ -44,7 +44,7 @@ void PhyloTree::setNumThreads(int num_threads) { if (!isSuperTree() && aln && num_threads > 1 && num_threads > aln->getNPattern()/8) { outWarning(convertIntToString(num_threads) + " threads for alignment length " + convertIntToString(aln->getNPattern()) + " will slow down analysis"); - num_threads = max(aln->getNPattern()/8,1); + num_threads = max(aln->getNPattern()/8,1UL); } this->num_threads = num_threads; } @@ -257,7 +257,7 @@ void PhyloTree::computeTipPartialLikelihood() { // ModelSet *models = (ModelSet*)model; size_t nptn = aln->getNPattern(), max_nptn = ((nptn+vector_size-1)/vector_size)*vector_size, tip_block_size = max_nptn * aln->num_states; int nstates = aln->num_states; - int nseq = aln->getNSeq(); + size_t nseq = aln->getNSeq(); ASSERT(vector_size > 0); #ifdef _OPENMP #pragma omp parallel for schedule(static) @@ -1223,7 +1223,7 @@ void PhyloTree::computeLikelihoodDervEigen(PhyloNeighbor *dad_branch, PhyloNode prob_const = 1.0 - prob_const; double df_frac = df_const / prob_const; double ddf_frac = ddf_const / prob_const; - int nsites = aln->getNSite(); + size_t nsites = aln->getNSite(); df += nsites * df_frac; ddf += nsites *(ddf_frac + df_frac*df_frac); } @@ -1515,8 +1515,9 @@ void PhyloTree::computeMarginalAncestralState(PhyloNeighbor *dad_branch, PhyloNo } void PhyloTree::writeMarginalAncestralState(ostream &out, PhyloNode *node, double *ptn_ancestral_prob, int *ptn_ancestral_seq) { - size_t site, nsites = aln->getNSite(), nstates = model->num_states; - for (site = 0; site < nsites; site++) { + size_t nsites = aln->getNSite(); + size_t nstates = model->num_states; + for (size_t site = 0; site < nsites; ++site) { int ptn = aln->getPatternID(site); out << node->name << "\t" << site+1 << "\t"; // if (params->print_ancestral_sequence == AST_JOINT) diff --git a/tree/quartet.cpp b/tree/quartet.cpp index e9e3c5299..3a1f38ce5 100644 --- a/tree/quartet.cpp +++ b/tree/quartet.cpp @@ -1533,8 +1533,7 @@ void PhyloTree::doLikelihoodMapping() { void PhyloTree::reportLikelihoodMapping(ofstream &out) { int64_t resolved, partly, unresolved; int64_t qid; - int leafNum; - leafNum = PhyloTree::aln->getNSeq(); + size_t leafNum = PhyloTree::aln->getNSeq(); // vector lmap_quartet_info; // vector lmap_seq_quartet_info; diff --git a/tree/tinatree.cpp b/tree/tinatree.cpp index 64c530ea3..16cd1d406 100644 --- a/tree/tinatree.cpp +++ b/tree/tinatree.cpp @@ -88,13 +88,13 @@ int TinaTree::computeParsimonyScore() { ASSERT(root && root->isLeaf()); int score = 0; - for (int ptn = 0; ptn < aln->size(); ptn++) + for (size_t ptn = 0; ptn < aln->size(); ++ptn) if (!aln->at(ptn).isConst()) { int states; int ptn_score = computeParsimonyScore(ptn, states); score += ptn_score * (*aln)[ptn].frequency; if (verbose_mode >= VB_MAX) { - for (int seq=0; seq < aln->getNSeq(); seq++) + for (size_t seq=0; seq < aln->getNSeq(); ++seq) cout << aln->convertStateBackStr(aln->at(ptn)[seq]); cout << " " << ptn_score << endl; } diff --git a/utils/MPIHelper.cpp b/utils/MPIHelper.cpp index 8187ebc44..fdd25d54c 100644 --- a/utils/MPIHelper.cpp +++ b/utils/MPIHelper.cpp @@ -61,7 +61,7 @@ int MPIHelper::countSameHost() { // detect if processes are in the same host char host_name[MPI_MAX_PROCESSOR_NAME]; int resultlen; - int pID = MPIHelper::getInstance().getProcessID(); + /*int pID =*/ (void) MPIHelper::getInstance().getProcessID(); MPI_Get_processor_name(host_name, &resultlen); char *host_names; host_names = new char[MPI_MAX_PROCESSOR_NAME * MPIHelper::getInstance().getNumProcesses()]; diff --git a/utils/bionj2.cpp b/utils/bionj2.cpp index 6ea81b4d7..c9524253f 100644 --- a/utils/bionj2.cpp +++ b/utils/bionj2.cpp @@ -83,7 +83,6 @@ #include //sequence names stored as std::string #include #include //for std::istream -#include //for boost::scoped_array #include //for Vec4d and Vec4db vector classes @@ -298,7 +297,7 @@ template class Matrix return; } try { - size_t w = widthNeededFor(rank); + size_t w = widthNeededFor(rank); n = rank; shrink_n = (rank+rank)/3; if (shrink_n<100) shrink_n=0; @@ -791,7 +790,6 @@ template class BIONJMatrix : public NJMatrix { T mu = 1.0 - lambda; T dCorrection = - lambda * aLength - mu * bLength; T vCorrection = - lambda * mu * Vab; - T replacementRowTotal = 0; #pragma omp parallel for for (int i=0; igetNSeq(); - int nsite = tree.aln->getNSite(); + size_t nseq = tree.aln->getNSeq(); + size_t nsite = tree.aln->getNSite(); WHT_setAlignmentSize(nseq, nsite); WHT_allocateMemory(); - for (i = 0; i < nseq; i++) - for (j = 0; j < nsite; j++) + for (size_t i = 0; i < nseq; i++) + for (size_t j = 0; j < nsite; j++) WHT_setSequenceSite(i, j, (*tree.aln)[tree.aln->getPatternID(j)][i]); - for (i = 0; i < nseq; i++) + for (size_t i = 0; i < nseq; i++) WHT_setSequenceName(i, tree.aln->getSeqName(i).c_str()); double gamma_shape = tree.getModelFactory()->site_rate->getGammaShape(); if (gamma_shape == 0) gamma_shape = 100.0;