Skip to content
This repository has been archived by the owner on Feb 6, 2024. It is now read-only.

Commit

Permalink
adjust min tree scaling (bug reported by Pete Hosner)
Browse files Browse the repository at this point in the history
  • Loading branch information
bqminh committed Jan 14, 2016
1 parent bc8965d commit 0bc1016
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 25 deletions.
2 changes: 1 addition & 1 deletion model/modelfactory.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class ModelFactory : public unordered_map<int, double*>, public Optimization
*/
//string getModelName();

void writeInfo(ostream &out);
virtual void writeInfo(ostream &out) {}

/**
Start to store transition matrix for efficiency
Expand Down
3 changes: 1 addition & 2 deletions model/ratefree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -533,9 +533,8 @@ double RateFree::optimizeWithEM() {
for (ptn = 0; ptn < nptn; ptn++)
tree->ptn_freq[ptn] = this_lk_cat[ptn*nmix];
double scaling = rates[c];
double max_scaling = max(scaling, 1.0/prop[c]);
tree->scaleLength(scaling);
tree->optimizeTreeLengthScaling(scaling, max_scaling, 0.001);
tree->optimizeTreeLengthScaling(MIN_PROP, scaling, 1.0/prop[c], 0.001);
converged = converged && (fabs(rates[c] - scaling) < 1e-4);
rates[c] = scaling;
sum += prop[c] * rates[c];
Expand Down
6 changes: 3 additions & 3 deletions phylosupertree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ void PhyloSuperTree::readPartition(Params &params) {
getline(in, info.position_spec);
trimString(info.sequence_type);
cout << endl << "Reading partition " << info.name << " (model=" << info.model_name << ", aln=" <<
info.aln_file << ", seq=" << info.sequence_type << ", pos=" << info.position_spec << ") ..." << endl;
info.aln_file << ", seq=" << info.sequence_type << ", pos=" << ((info.position_spec.length() >= 20) ? info.position_spec.substr(0,20)+"..." : info.position_spec) << ") ..." << endl;

//info.mem_ptnlh = NULL;
info.nniMoves[0].ptnlh = NULL;
Expand Down Expand Up @@ -182,7 +182,7 @@ void PhyloSuperTree::readPartitionRaxml(Params &params) {
outError("Please specify alignment positions for partition" + info.name);
std::replace(info.position_spec.begin(), info.position_spec.end(), ',', ' ');

cout << "Reading partition " << info.name << " (model=" << info.model_name << ", seq=" << info.sequence_type << ", pos=" << info.position_spec << ") ..." << endl;
cout << "Reading partition " << info.name << " (model=" << info.model_name << ", seq=" << info.sequence_type << ", pos=" << ((info.position_spec.length() >= 20) ? info.position_spec.substr(0,20)+"..." : info.position_spec) << ") ..." << endl;

//info.mem_ptnlh = NULL;
info.nniMoves[0].ptnlh = NULL;
Expand Down Expand Up @@ -265,7 +265,7 @@ void PhyloSuperTree::readPartitionNexus(Params &params) {
info.position_spec = (*it)->position_spec;
trimString(info.sequence_type);
cout << endl << "Reading partition " << info.name << " (model=" << info.model_name << ", aln=" <<
info.aln_file << ", seq=" << info.sequence_type << ", pos=" << info.position_spec << ") ..." << endl;
info.aln_file << ", seq=" << info.sequence_type << ", pos=" << ((info.position_spec.length() >= 20) ? info.position_spec.substr(0,20)+"..." : info.position_spec) << ") ..." << endl;
if (info.sequence_type != "" && Alignment::getSeqType(info.sequence_type.c_str()) == SEQ_UNKNOWN)
outError("Unknown sequence type " + info.sequence_type);
//info.mem_ptnlh = NULL;
Expand Down
38 changes: 24 additions & 14 deletions phylosupertreeplen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,11 @@ double PartitionModelPlen::optimizeParameters(bool fixed_len, bool write_info, d
}
if (verbose_mode >= VB_MED)
cout << "LnL after optimizing individual models: " << cur_lh << endl;
if (cur_lh <= tree_lh - 1.0) {
// more info for ASSERTION
writeInfo(cout);
tree->printTree(cout, WT_BR_LEN+WT_NEWLINE);
}
assert(cur_lh > tree_lh - 1.0 && "individual model opt reduces LnL");

tree->clearAllPartialLH();
Expand All @@ -129,11 +134,7 @@ double PartitionModelPlen::optimizeParameters(bool fixed_len, bool write_info, d
cur_lh = optimizeGeneRate(gradient_epsilon);
if (verbose_mode >= VB_MED) {
cout << "LnL after optimizing partition-specific rates: " << cur_lh << endl;
cout << "Partition-specific rates: ";
for(int part = 0; part < ntrees; part++){
cout << " " << tree->part_info[part].part_rate;
}
cout << endl;
writeInfo(cout);
}
assert(cur_lh > tree_lh - 1.0 && "partition rate opt reduces LnL");
}
Expand All @@ -156,33 +157,42 @@ double PartitionModelPlen::optimizeParameters(bool fixed_len, bool write_info, d
tree_lh = cur_lh;
}
// cout <<"OPTIMIZE MODEL has finished"<< endl;
if (write_info)
writeInfo(cout);
cout << "Parameters optimization took " << i-1 << " rounds (" << getRealTime()-begin_time << " sec)" << endl << endl;

return tree_lh;
}

void PartitionModelPlen::writeInfo(ostream &out) {
PhyloSuperTreePlen *tree = (PhyloSuperTreePlen*)site_rate->getTree();
int ntrees = tree->size();
if (!tree->fixed_rates) {
cout << "Partition-specific rates: ";
out << "Partition-specific rates: ";
for(int part = 0; part < ntrees; part++){
cout << " " << tree->part_info[part].part_rate;
out << " " << tree->part_info[part].part_rate;
}
cout << endl;
out << endl;
}
cout << "Parameters optimization took " << i-1 << " rounds (" << getRealTime()-begin_time << " sec)" << endl << endl;

return tree_lh;
}



double PartitionModelPlen::optimizeGeneRate(double gradient_epsilon)
{
PhyloSuperTreePlen *tree = (PhyloSuperTreePlen*)site_rate->getTree();
// BQM 22-05-2015: change to optimize individual rates
int i;
double score = 0.0;
double nsites = tree->getAlnNSite();

if (tree->part_order.empty()) tree->computePartitionOrder();

#ifdef _OPENMP
#pragma omp parallel for reduction(+: score) private(i) schedule(dynamic) if(tree->size() >= tree->params->num_threads)
#endif
for (int j = 0; j < tree->size(); j++) {
int i = tree->part_order[j];
tree->part_info[i].cur_score = tree->at(i)->optimizeTreeLengthScaling(tree->part_info[i].part_rate, max(tree->part_info[i].part_rate, 100.0), gradient_epsilon);
double max_scaling = nsites / tree->at(i)->getAlnNSite();
tree->part_info[i].cur_score = tree->at(i)->optimizeTreeLengthScaling(1.0/tree->at(i)->getAlnNSite(), tree->part_info[i].part_rate, max_scaling, gradient_epsilon);
score += tree->part_info[i].cur_score;
}
// now normalize the rates
Expand Down
6 changes: 6 additions & 0 deletions phylosupertreeplen.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,12 @@ class PartitionModelPlen : public PartitionModel
virtual int getNParameters();
virtual int getNDim();

/**
write information
@param out output stream
*/
virtual void writeInfo(ostream &out);

/**
optimize model parameters and tree branch lengths
@param fixed_len TRUE to fix branch lengths, default is false
Expand Down
8 changes: 4 additions & 4 deletions phylotree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3068,16 +3068,16 @@ void PhyloTree::computeLikelihoodDervNaive(PhyloNeighbor *dad_branch, PhyloNode
Branch length optimization by maximum likelihood
****************************************************************************/

const double MIN_TREE_LENGTH_SCALE = 0.001;
const double MAX_TREE_LENGTH_SCALE = 100.0;
//const double MIN_TREE_LENGTH_SCALE = 0.001;
//const double MAX_TREE_LENGTH_SCALE = 100.0;
const double TOL_TREE_LENGTH_SCALE = 0.001;


double PhyloTree::optimizeTreeLengthScaling(double &scaling, double max_scaling, double gradient_epsilon) {
double PhyloTree::optimizeTreeLengthScaling(double min_scaling, double &scaling, double max_scaling, double gradient_epsilon) {
is_opt_scaling = true;
current_scaling = scaling;
double negative_lh, ferror;
scaling = minimizeOneDimen(min(current_scaling/2.0, MIN_TREE_LENGTH_SCALE), scaling, max_scaling, max(TOL_TREE_LENGTH_SCALE, gradient_epsilon), &negative_lh, &ferror);
scaling = minimizeOneDimen(min(scaling, min_scaling), scaling, max(max_scaling, scaling), max(TOL_TREE_LENGTH_SCALE, gradient_epsilon), &negative_lh, &ferror);
if (scaling != current_scaling) {
scaleLength(scaling / current_scaling);
current_scaling = scaling;
Expand Down
2 changes: 1 addition & 1 deletion phylotree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1057,7 +1057,7 @@ class PhyloTree : public MTree, public Optimization {
@param gradient_epsilon gradient epsilon
@return optimal tree log-likelihood
*/
double optimizeTreeLengthScaling(double &scaling, double max_scaling, double gradient_epsilon);
double optimizeTreeLengthScaling(double min_scaling, double &scaling, double max_scaling, double gradient_epsilon);


/****************************************************************************
Expand Down

0 comments on commit 0bc1016

Please sign in to comment.