From 0bc1016476530464b5ada0f5817e3dbc158b23ef Mon Sep 17 00:00:00 2001 From: minh Date: Thu, 14 Jan 2016 10:24:59 +0100 Subject: [PATCH] adjust min tree scaling (bug reported by Pete Hosner) --- model/modelfactory.h | 2 +- model/ratefree.cpp | 3 +-- phylosupertree.cpp | 6 +++--- phylosupertreeplen.cpp | 38 ++++++++++++++++++++++++-------------- phylosupertreeplen.h | 6 ++++++ phylotree.cpp | 8 ++++---- phylotree.h | 2 +- 7 files changed, 40 insertions(+), 25 deletions(-) diff --git a/model/modelfactory.h b/model/modelfactory.h index 7a0d455a..6baf4273 100644 --- a/model/modelfactory.h +++ b/model/modelfactory.h @@ -69,7 +69,7 @@ class ModelFactory : public unordered_map, public Optimization */ //string getModelName(); - void writeInfo(ostream &out); + virtual void writeInfo(ostream &out) {} /** Start to store transition matrix for efficiency diff --git a/model/ratefree.cpp b/model/ratefree.cpp index e774c127..91fc0fd2 100644 --- a/model/ratefree.cpp +++ b/model/ratefree.cpp @@ -533,9 +533,8 @@ double RateFree::optimizeWithEM() { for (ptn = 0; ptn < nptn; ptn++) tree->ptn_freq[ptn] = this_lk_cat[ptn*nmix]; double scaling = rates[c]; - double max_scaling = max(scaling, 1.0/prop[c]); tree->scaleLength(scaling); - tree->optimizeTreeLengthScaling(scaling, max_scaling, 0.001); + tree->optimizeTreeLengthScaling(MIN_PROP, scaling, 1.0/prop[c], 0.001); converged = converged && (fabs(rates[c] - scaling) < 1e-4); rates[c] = scaling; sum += prop[c] * rates[c]; diff --git a/phylosupertree.cpp b/phylosupertree.cpp index 44d207ec..5df6ba6f 100644 --- a/phylosupertree.cpp +++ b/phylosupertree.cpp @@ -70,7 +70,7 @@ void PhyloSuperTree::readPartition(Params ¶ms) { getline(in, info.position_spec); trimString(info.sequence_type); cout << endl << "Reading partition " << info.name << " (model=" << info.model_name << ", aln=" << - info.aln_file << ", seq=" << info.sequence_type << ", pos=" << info.position_spec << ") ..." << endl; + info.aln_file << ", seq=" << info.sequence_type << ", pos=" << ((info.position_spec.length() >= 20) ? info.position_spec.substr(0,20)+"..." : info.position_spec) << ") ..." << endl; //info.mem_ptnlh = NULL; info.nniMoves[0].ptnlh = NULL; @@ -182,7 +182,7 @@ void PhyloSuperTree::readPartitionRaxml(Params ¶ms) { outError("Please specify alignment positions for partition" + info.name); std::replace(info.position_spec.begin(), info.position_spec.end(), ',', ' '); - cout << "Reading partition " << info.name << " (model=" << info.model_name << ", seq=" << info.sequence_type << ", pos=" << info.position_spec << ") ..." << endl; + cout << "Reading partition " << info.name << " (model=" << info.model_name << ", seq=" << info.sequence_type << ", pos=" << ((info.position_spec.length() >= 20) ? info.position_spec.substr(0,20)+"..." : info.position_spec) << ") ..." << endl; //info.mem_ptnlh = NULL; info.nniMoves[0].ptnlh = NULL; @@ -265,7 +265,7 @@ void PhyloSuperTree::readPartitionNexus(Params ¶ms) { info.position_spec = (*it)->position_spec; trimString(info.sequence_type); cout << endl << "Reading partition " << info.name << " (model=" << info.model_name << ", aln=" << - info.aln_file << ", seq=" << info.sequence_type << ", pos=" << info.position_spec << ") ..." << endl; + info.aln_file << ", seq=" << info.sequence_type << ", pos=" << ((info.position_spec.length() >= 20) ? info.position_spec.substr(0,20)+"..." : info.position_spec) << ") ..." << endl; if (info.sequence_type != "" && Alignment::getSeqType(info.sequence_type.c_str()) == SEQ_UNKNOWN) outError("Unknown sequence type " + info.sequence_type); //info.mem_ptnlh = NULL; diff --git a/phylosupertreeplen.cpp b/phylosupertreeplen.cpp index b6530cbe..75356db7 100644 --- a/phylosupertreeplen.cpp +++ b/phylosupertreeplen.cpp @@ -121,6 +121,11 @@ double PartitionModelPlen::optimizeParameters(bool fixed_len, bool write_info, d } if (verbose_mode >= VB_MED) cout << "LnL after optimizing individual models: " << cur_lh << endl; + if (cur_lh <= tree_lh - 1.0) { + // more info for ASSERTION + writeInfo(cout); + tree->printTree(cout, WT_BR_LEN+WT_NEWLINE); + } assert(cur_lh > tree_lh - 1.0 && "individual model opt reduces LnL"); tree->clearAllPartialLH(); @@ -129,11 +134,7 @@ double PartitionModelPlen::optimizeParameters(bool fixed_len, bool write_info, d cur_lh = optimizeGeneRate(gradient_epsilon); if (verbose_mode >= VB_MED) { cout << "LnL after optimizing partition-specific rates: " << cur_lh << endl; - cout << "Partition-specific rates: "; - for(int part = 0; part < ntrees; part++){ - cout << " " << tree->part_info[part].part_rate; - } - cout << endl; + writeInfo(cout); } assert(cur_lh > tree_lh - 1.0 && "partition rate opt reduces LnL"); } @@ -156,33 +157,42 @@ double PartitionModelPlen::optimizeParameters(bool fixed_len, bool write_info, d tree_lh = cur_lh; } // cout <<"OPTIMIZE MODEL has finished"<< endl; + if (write_info) + writeInfo(cout); + cout << "Parameters optimization took " << i-1 << " rounds (" << getRealTime()-begin_time << " sec)" << endl << endl; + + return tree_lh; +} + +void PartitionModelPlen::writeInfo(ostream &out) { + PhyloSuperTreePlen *tree = (PhyloSuperTreePlen*)site_rate->getTree(); + int ntrees = tree->size(); if (!tree->fixed_rates) { - cout << "Partition-specific rates: "; + out << "Partition-specific rates: "; for(int part = 0; part < ntrees; part++){ - cout << " " << tree->part_info[part].part_rate; + out << " " << tree->part_info[part].part_rate; } - cout << endl; + out << endl; } - cout << "Parameters optimization took " << i-1 << " rounds (" << getRealTime()-begin_time << " sec)" << endl << endl; - - return tree_lh; } - - + double PartitionModelPlen::optimizeGeneRate(double gradient_epsilon) { PhyloSuperTreePlen *tree = (PhyloSuperTreePlen*)site_rate->getTree(); // BQM 22-05-2015: change to optimize individual rates int i; double score = 0.0; + double nsites = tree->getAlnNSite(); if (tree->part_order.empty()) tree->computePartitionOrder(); + #ifdef _OPENMP #pragma omp parallel for reduction(+: score) private(i) schedule(dynamic) if(tree->size() >= tree->params->num_threads) #endif for (int j = 0; j < tree->size(); j++) { int i = tree->part_order[j]; - tree->part_info[i].cur_score = tree->at(i)->optimizeTreeLengthScaling(tree->part_info[i].part_rate, max(tree->part_info[i].part_rate, 100.0), gradient_epsilon); + double max_scaling = nsites / tree->at(i)->getAlnNSite(); + tree->part_info[i].cur_score = tree->at(i)->optimizeTreeLengthScaling(1.0/tree->at(i)->getAlnNSite(), tree->part_info[i].part_rate, max_scaling, gradient_epsilon); score += tree->part_info[i].cur_score; } // now normalize the rates diff --git a/phylosupertreeplen.h b/phylosupertreeplen.h index aab6642b..39a7f67c 100644 --- a/phylosupertreeplen.h +++ b/phylosupertreeplen.h @@ -106,6 +106,12 @@ class PartitionModelPlen : public PartitionModel virtual int getNParameters(); virtual int getNDim(); + /** + write information + @param out output stream + */ + virtual void writeInfo(ostream &out); + /** optimize model parameters and tree branch lengths @param fixed_len TRUE to fix branch lengths, default is false diff --git a/phylotree.cpp b/phylotree.cpp index b2a9fe39..82c19b0d 100644 --- a/phylotree.cpp +++ b/phylotree.cpp @@ -3068,16 +3068,16 @@ void PhyloTree::computeLikelihoodDervNaive(PhyloNeighbor *dad_branch, PhyloNode Branch length optimization by maximum likelihood ****************************************************************************/ -const double MIN_TREE_LENGTH_SCALE = 0.001; -const double MAX_TREE_LENGTH_SCALE = 100.0; +//const double MIN_TREE_LENGTH_SCALE = 0.001; +//const double MAX_TREE_LENGTH_SCALE = 100.0; const double TOL_TREE_LENGTH_SCALE = 0.001; -double PhyloTree::optimizeTreeLengthScaling(double &scaling, double max_scaling, double gradient_epsilon) { +double PhyloTree::optimizeTreeLengthScaling(double min_scaling, double &scaling, double max_scaling, double gradient_epsilon) { is_opt_scaling = true; current_scaling = scaling; double negative_lh, ferror; - scaling = minimizeOneDimen(min(current_scaling/2.0, MIN_TREE_LENGTH_SCALE), scaling, max_scaling, max(TOL_TREE_LENGTH_SCALE, gradient_epsilon), &negative_lh, &ferror); + scaling = minimizeOneDimen(min(scaling, min_scaling), scaling, max(max_scaling, scaling), max(TOL_TREE_LENGTH_SCALE, gradient_epsilon), &negative_lh, &ferror); if (scaling != current_scaling) { scaleLength(scaling / current_scaling); current_scaling = scaling; diff --git a/phylotree.h b/phylotree.h index 079ee43d..a2c2480b 100644 --- a/phylotree.h +++ b/phylotree.h @@ -1057,7 +1057,7 @@ class PhyloTree : public MTree, public Optimization { @param gradient_epsilon gradient epsilon @return optimal tree log-likelihood */ - double optimizeTreeLengthScaling(double &scaling, double max_scaling, double gradient_epsilon); + double optimizeTreeLengthScaling(double min_scaling, double &scaling, double max_scaling, double gradient_epsilon); /****************************************************************************