From 0b47ae853b82e1ac17c51193abdcb127dcad9e69 Mon Sep 17 00:00:00 2001 From: minh Date: Mon, 4 Dec 2017 14:01:26 +0100 Subject: [PATCH] version 1.5.6 --- CMakeLists.txt | 2 +- alignment.cpp | 113 ++++++++++++++++++++++++------------------------- 2 files changed, 57 insertions(+), 58 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5cf60248..5049f515 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,7 +50,7 @@ add_definitions(-DIQ_TREE) # The version number. set (iqtree_VERSION_MAJOR 1) set (iqtree_VERSION_MINOR 5) -set (iqtree_VERSION_PATCH "5") +set (iqtree_VERSION_PATCH "6") set(BUILD_SHARED_LIBS OFF) diff --git a/alignment.cpp b/alignment.cpp index 3faaa59e..2908ffc3 100644 --- a/alignment.cpp +++ b/alignment.cpp @@ -119,6 +119,8 @@ int Alignment::checkAbsentStates(string msg) { rare_states += ", "; rare_states += convertStateBackStr(i); } + if (absent_states.size() >= num_states-1) + outError("Only one state is observed in " + msg); if (!absent_states.empty()) cout << "NOTE: State(s) " << absent_states << " not present in " << msg << " and thus removed from Markov process to prevent numerical problems" << endl; if (!rare_states.empty()) @@ -165,67 +167,64 @@ void Alignment::checkSeqName() { computeStateFreq(state_freq); // computeStateFreqPerSequence(freq_per_sequence); countStatePerSequence(count_per_seq); + + int i, df = -1; + for (i = 0; i < num_states; i++) + if (state_freq[i] > 0.0) df++; - /*if (verbose_mode >= VB_MIN)*/ { - int max_len = getMaxSeqNameLength()+1; -// cout << " ID "; -// cout << " Sequence"; - cout.width(max_len+14); - cout << right << "Gap/Ambiguity" << " Composition p-value"<< endl; - int num_problem_seq = 0; - int total_gaps = 0; - cout.precision(2); - int num_failed = 0; - for (int i = 0; i < seq_names.size(); i++) { - int j; - int num_gaps = getNSite() - countProperChar(i); - total_gaps += num_gaps; - double percent_gaps = ((double)num_gaps / getNSite())*100.0; - cout.width(4); - cout << right << i+1 << " "; - cout.width(max_len); - cout << left << seq_names[i] << " "; - cout.width(6); -// cout << num_gaps << " (" << percent_gaps << "%)"; - cout << right << percent_gaps << "%"; - if (percent_gaps > 50) { -// cout << " !!!"; - num_problem_seq++; - } -// cout << "\t" << seq_states[i].size(); + int max_len = getMaxSeqNameLength()+1; + cout.width(max_len+14); + cout << right << "Gap/Ambiguity" << " Composition p-value"<< endl; + int num_problem_seq = 0; + int total_gaps = 0; + cout.precision(2); + int num_failed = 0; + for (i = 0; i < seq_names.size(); i++) { + int j; + int num_gaps = getNSite() - countProperChar(i); + total_gaps += num_gaps; + double percent_gaps = ((double)num_gaps / getNSite())*100.0; + cout.width(4); + cout << right << i+1 << " "; + cout.width(max_len); + cout << left << seq_names[i] << " "; + cout.width(6); + cout << right << percent_gaps << "%"; + if (percent_gaps > 50) { + num_problem_seq++; + } - double chi2 = 0.0; - unsigned sum_count = 0; - for (j = 0; j < num_states; j++) - sum_count += count_per_seq[i*num_states+j]; - double sum_inv = 1.0/sum_count; - for (j = 0; j < num_states; j++) - freq_per_sequence[j] = count_per_seq[i*num_states+j]*sum_inv; - for (j = 0; j < num_states; j++) + double chi2 = 0.0; + unsigned sum_count = 0; + for (j = 0; j < num_states; j++) + sum_count += count_per_seq[i*num_states+j]; + double sum_inv = 1.0/sum_count; + for (j = 0; j < num_states; j++) + freq_per_sequence[j] = count_per_seq[i*num_states+j]*sum_inv; + for (j = 0; j < num_states; j++) + if (state_freq[j] > 0.0) chi2 += (state_freq[j] - freq_per_sequence[j]) * (state_freq[j] - freq_per_sequence[j]) / state_freq[j]; - -// chi2 *= getNSite(); - chi2 *= sum_count; - double pvalue = chi2prob(num_states-1, chi2); - if (pvalue < 0.05) { - cout << " failed "; - num_failed++; - } else - cout << " passed "; - cout.width(9); - cout << right << pvalue*100 << "%"; -// cout << " " << chi2; - cout << endl; - } - if (num_problem_seq) cout << "WARNING: " << num_problem_seq << " sequences contain more than 50% gaps/ambiguity" << endl; - cout << "**** "; - cout.width(max_len+2); - cout << left << " TOTAL "; - cout.width(6); - cout << right << ((double)total_gaps/getNSite())/getNSeq()*100 << "% "; - cout << " " << num_failed << " sequences failed composition chi2 test (p-value<5%; df=" << num_states-1 << ")" << endl; - cout.precision(3); + + chi2 *= sum_count; + double pvalue = chi2prob(df, chi2); + if (pvalue < 0.05) { + cout << " failed "; + num_failed++; + } else + cout << " passed "; + cout.width(9); + cout << right << pvalue*100 << "%"; + cout << endl; } + if (num_problem_seq) cout << "WARNING: " << num_problem_seq << " sequences contain more than 50% gaps/ambiguity" << endl; + cout << "**** "; + cout.width(max_len+2); + cout << left << " TOTAL "; + cout.width(6); + cout << right << ((double)total_gaps/getNSite())/getNSeq()*100 << "% "; + cout << " " << num_failed << " sequences failed composition chi2 test (p-value<5%; df=" << df << ")" << endl; + cout.precision(3); + delete [] count_per_seq; delete [] freq_per_sequence; delete [] state_freq;