diff --git a/main/phyloanalysis.cpp b/main/phyloanalysis.cpp index 638b72a4..00fcdaff 100644 --- a/main/phyloanalysis.cpp +++ b/main/phyloanalysis.cpp @@ -3730,14 +3730,15 @@ void assignBranchSupportNew(Params ¶ms) { PhyloTree tree; tree.readTree(params.second_tree, params.is_rooted); cout << tree.leafNum << " taxa and " << tree.branchNum << " branches" << endl; - tree.assignBranchSupport(params.user_file); + map branch_supports; + tree.assignBranchSupport(params.user_file, branch_supports); if (params.site_concordance) { if (!params.aln_file) outError("Please provide an alignment"); Alignment *aln = new Alignment(params.aln_file, params.sequence_type, params.intype, params.model_name); tree.setAlignment(aln); cout << "Computing site concordance factor..." << endl; - tree.computeSiteConcordanceFactor(); + tree.computeSiteConcordanceFactor(branch_supports); delete aln; } string str = (string)params.second_tree + ".suptree"; @@ -3745,6 +3746,31 @@ void assignBranchSupportNew(Params ¶ms) { cout << "Tree with assigned branch supports written to " << str << endl; if (verbose_mode >= VB_DEBUG) tree.drawTree(cout); + ofstream out; + string filename = (string)params.second_tree + ".support"; + out.open(filename.c_str()); + out << "# ID: Node ID" << endl + << "# Length: Branch length" << endl + << "# Value: Current node value (e.g. bootstrap)" << endl + << "# geneCF: Gene concordance factor" << endl + << "# geneN: Number of trees decisive for the branch" << endl; + if (params.site_concordance) + out << "# siteCF: Site concordance factor averaged over " << params.site_concordance << " quartets" << endl + << "# siteN: number of informative sites averaged over " << params.site_concordance << " quartets" << endl; + out << "ID\tLength\tValue\tgeneCF\tgeneN"; + if (params.site_concordance) + out << "\tsiteCF\tsiteN"; + out << endl; + for (auto brit = branch_supports.begin(); brit != branch_supports.end(); brit++) { + out << brit->second.id << '\t' << brit->second.length << '\t' << brit->second.name << '\t' + << brit->second.geneCF << '\t' << brit->second.geneN; + if (params.site_concordance) + out << '\t' << brit->second.siteCF << '\t' + << brit->second.siteN; + out << endl; + } + out.close(); + cout << "Concordance factors printed to " << filename << endl; } diff --git a/tree/discordance.cpp b/tree/discordance.cpp index 5ab262f4..864ab8c7 100644 --- a/tree/discordance.cpp +++ b/tree/discordance.cpp @@ -7,13 +7,21 @@ #include "phylotree.h" -void PhyloTree::computeSiteConcordanceFactor() { +void PhyloTree::computeSiteConcordanceFactor(map &branch_supports) { Branches branches; getInnerBranches(branches); for (auto it = branches.begin(); it != branches.end(); it++) { - double sup = computeSiteConcordanceFactor(it->second); + double num_sites; + double sup = computeSiteConcordanceFactor(it->second, num_sites); string sup_str = convertDoubleToString(round(sup*1000)/10); Node *node = it->second.second; + + auto brsup = branch_supports.find(node->id); + ASSERT(brsup != branch_supports.end()); + brsup->second.siteCF = sup; + brsup->second.siteN = num_sites; + brsup->second.length = node->findNeighbor(it->second.first)->length; + if (Params::getInstance().newick_extended_format) { if (node->name.empty() || node->name.back() != ']') { node->name += "[&sCF=" + sup_str + "]"; @@ -27,7 +35,7 @@ void PhyloTree::computeSiteConcordanceFactor() { } } -double PhyloTree::computeSiteConcordanceFactor(Branch &branch) { +double PhyloTree::computeSiteConcordanceFactor(Branch &branch, double &num_sites) { vector taxa; taxa.resize(4); @@ -49,6 +57,7 @@ double PhyloTree::computeSiteConcordanceFactor(Branch &branch) { } double sum_support = 0.0; + int sum_sites = 0; for (int i = 0; i < Params::getInstance().site_concordance; i++) { int j; // get a random quartet @@ -76,8 +85,10 @@ double PhyloTree::computeSiteConcordanceFactor(Branch &branch) { support[2] += pat->frequency; } int sum = support[0] + support[1] + support[2]; + sum_sites += sum; if (sum > 0) sum_support += ((double)support[0]) / sum; } + num_sites = (double)sum_sites / Params::getInstance().site_concordance; return sum_support / Params::getInstance().site_concordance; } diff --git a/tree/mtree.cpp b/tree/mtree.cpp index df6b637f..e305c318 100644 --- a/tree/mtree.cpp +++ b/tree/mtree.cpp @@ -1983,20 +1983,20 @@ void MTree::extractQuadSubtrees(vector &subtrees, Node *node, Node *dad) } -void MTree::assignBranchSupport(const char *trees_file) { +void MTree::assignBranchSupport(const char *trees_file, map &branch_supports) { cout << "Reading input trees file " << trees_file << endl; try { ifstream in; in.exceptions(ios::failbit | ios::badbit); in.open(trees_file); - assignBranchSupport(in); + assignBranchSupport(in, branch_supports); in.close(); } catch (ios::failure) { outError(ERR_READ_INPUT, trees_file); } } -void MTree::assignBranchSupport(istream &in) { +void MTree::assignBranchSupport(istream &in, map &branch_supports) { SplitGraph mysg; NodeVector mynodes; convertSplits(mysg, &mynodes, root->neighbors[0]->node); @@ -2100,6 +2100,13 @@ void MTree::assignBranchSupport(istream &in) { for (int i = 0; i < mysg.size(); i++) if (!mynodes[i]->isLeaf()) { + BranchSupportInfo brsup; + brsup.id = mynodes[i]->id; + brsup.name = mynodes[i]->name; + brsup.geneCF = mysg[i]->getWeight()/decisive_counts[i]; + brsup.geneN = decisive_counts[i]; + branch_supports[brsup.id] = brsup; + stringstream tmp; if (mysg[i]->getWeight() == 0.0) tmp << "0"; diff --git a/tree/mtree.h b/tree/mtree.h index 31d4dad7..ca98e82b 100644 --- a/tree/mtree.h +++ b/tree/mtree.h @@ -36,6 +36,24 @@ const char BRANCH_LENGTH_SEPARATOR = '/'; class SplitGraph; class MTreeSet; +class BranchSupportInfo { +public: + BranchSupportInfo() { + id = 0; + length = 0.0; + geneCF = 0.0; + geneN = 0; + siteCF = siteN = 0.0; + } + int id; // branch id + double length; + string name; + double geneCF; // gene concordance factor + int geneN; // number of gene trees that is decisive + double siteCF; // site concordance factor + double siteN; // number of sites that is decisive +}; + /** General-purposed tree @author BUI Quang Minh, Steffen Klaere, Arndt von Haeseler @@ -711,9 +729,9 @@ class MTree { * Work fine also when the trees do not have the same taxon set. * @param trees_file set of trees in NEWICK */ - void assignBranchSupport(const char *trees_file); + void assignBranchSupport(const char *trees_file, map &branch_supports); - void assignBranchSupport(istream &in); + void assignBranchSupport(istream &in, map &branch_supports); /** * compute robinson foulds distance between this tree and a set of trees. diff --git a/tree/phylotree.h b/tree/phylotree.h index 6190c578..55385774 100644 --- a/tree/phylotree.h +++ b/tree/phylotree.h @@ -1778,13 +1778,14 @@ class PhyloTree : public MTree, public Optimization, public CheckpointFactory { /** compute site concordance factor and assign node names */ - void computeSiteConcordanceFactor(); + void computeSiteConcordanceFactor(map &branch_supports); /** compute site concordance factor and assign node names @return sCF value for this branch + @num_sites average number of quartet informative sites for the branch */ - virtual double computeSiteConcordanceFactor(Branch &branch); + virtual double computeSiteConcordanceFactor(Branch &branch, double &num_sites); /**************************************************************************** Collapse stable (highly supported) clades by one representative diff --git a/utils/tools.cpp b/utils/tools.cpp index 719356a0..df0f23e7 100644 --- a/utils/tools.cpp +++ b/utils/tools.cpp @@ -1487,7 +1487,7 @@ void parseArg(int argc, char *argv[], Params ¶ms) { params.support_tag = argv[cnt]; continue; } - if (strcmp(argv[cnt], "-sup2") == 0) { + if (strcmp(argv[cnt], "-sup2") == 0 || strcmp(argv[cnt], "--support") == 0) { cnt++; if (cnt >= argc) throw "Use -sup2 ";