Skip to content

Commit

Permalink
version 2.2.2.7 fixing -scfl with gappy alignments. The previous -scf…
Browse files Browse the repository at this point in the history
…l now becomes -scflg.
  • Loading branch information
bqminh committed May 30, 2023
1 parent 2f72299 commit bd3468c
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 7 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ endif()
# The version number.
set (iqtree_VERSION_MAJOR 2)
set (iqtree_VERSION_MINOR 2)
set (iqtree_VERSION_PATCH ".2.6")
set (iqtree_VERSION_PATCH ".2.7")

option(BUILD_SHARED_LIBS "Build Shared Libraries" OFF)

Expand Down
41 changes: 35 additions & 6 deletions tree/discordance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,10 @@ void PhyloTree::computeSubtreeAncestralState(PhyloNeighbor *dad_branch, PhyloNod
if (state_prob[i] > state_prob[state_best])
state_best = i;
}
if (params->ancestral_site_concordance == 2 && sum > 1.0) {
// UPDATE for sCLF: ignoring sites where one subtree shows just gap/ambiguity
state_best = aln->STATE_UNKNOWN;
}
sum = 1.0/sum;
for (size_t i = 0; i < nstates; i++) {
state_prob[i] *= sum;
Expand Down Expand Up @@ -441,6 +445,8 @@ void PhyloTree::computeAncestralSiteConcordance(Branch &branch, int nquartets, i
// vector<Neighbor*> second_nei;
vector<double*> first_ancestral_prob;
vector<double*> second_ancestral_prob;
vector<int*> first_ancestral_seq;
vector<int*> second_ancestral_seq;

// extract two nodes adjecent to branch.first
FOR_NEIGHBOR_DECLARE(branch.first, branch.second, it) {
Expand All @@ -449,18 +455,20 @@ void PhyloTree::computeAncestralSiteConcordance(Branch &branch, int nquartets, i
return;
// first_nei.push_back(*it);
double *ptn_ancestral_prob = newAncestralProb();
int *ptn_ancestral_seq = aligned_alloc<int>(getAlnNPattern());
if (params->ancestral_site_concordance == 1)
computeMarginalAncestralState((PhyloNeighbor*)(*it), (PhyloNode*)branch.first,
ptn_ancestral_prob, marginal_ancestral_seq);
ptn_ancestral_prob, ptn_ancestral_seq);
else
computeSubtreeAncestralState((PhyloNeighbor*)(*it), (PhyloNode*)branch.first,
ptn_ancestral_prob, marginal_ancestral_seq);
ptn_ancestral_prob, ptn_ancestral_seq);
// PhyloNeighbor* nei = (PhyloNeighbor*)(*it)->node->findNeighbor(branch.first);
// computeMarginalAncestralState(nei, (PhyloNode*)(*it)->node,
// ptn_ancestral_prob, marginal_ancestral_seq);
if (verbose_mode >= VB_MED)
writeMarginalAncestralState(cout, (PhyloNode*)((*it)->node), ptn_ancestral_prob, marginal_ancestral_seq);
writeMarginalAncestralState(cout, (PhyloNode*)((*it)->node), ptn_ancestral_prob, ptn_ancestral_seq);
first_ancestral_prob.push_back(ptn_ancestral_prob);
first_ancestral_seq.push_back(ptn_ancestral_seq);
}

// extract the taxa from the two right subtrees
Expand All @@ -470,18 +478,20 @@ void PhyloTree::computeAncestralSiteConcordance(Branch &branch, int nquartets, i
return;
// second_nei.push_back(*it);
double *ptn_ancestral_prob = newAncestralProb();
int *ptn_ancestral_seq = aligned_alloc<int>(getAlnNPattern());
if (params->ancestral_site_concordance == 1)
computeMarginalAncestralState((PhyloNeighbor*)(*it), (PhyloNode*)branch.second,
ptn_ancestral_prob, marginal_ancestral_seq);
ptn_ancestral_prob, ptn_ancestral_seq);
else
computeSubtreeAncestralState((PhyloNeighbor*)(*it), (PhyloNode*)branch.second,
ptn_ancestral_prob, marginal_ancestral_seq);
ptn_ancestral_prob, ptn_ancestral_seq);
// PhyloNeighbor* nei = (PhyloNeighbor*)(*it)->node->findNeighbor(branch.second);
// computeMarginalAncestralState(nei, (PhyloNode*)(*it)->node,
// ptn_ancestral_prob, marginal_ancestral_seq);
if (verbose_mode >= VB_MED)
writeMarginalAncestralState(cout, (PhyloNode*)((*it)->node), ptn_ancestral_prob, marginal_ancestral_seq);
writeMarginalAncestralState(cout, (PhyloNode*)((*it)->node), ptn_ancestral_prob, ptn_ancestral_seq);
second_ancestral_prob.push_back(ptn_ancestral_prob);
second_ancestral_seq.push_back(ptn_ancestral_seq);
}

ASSERT(first_ancestral_prob.size() >= 2);
Expand Down Expand Up @@ -517,10 +527,18 @@ void PhyloTree::computeAncestralSiteConcordance(Branch &branch, int nquartets, i
if (isSuperTree()) {
PhyloSuperTree* stree = (PhyloSuperTree*)this;
size_t start_pos = 0;
size_t start_pos_seq = 0;
for (auto it = stree->begin(); it != stree->end(); it++) {
size_t nptn = (*it)->getAlnNPattern();
size_t nstates = (*it)->model->num_states;
for (size_t ptn = 0; ptn < nptn; ptn++) {
// FIX sCFL: ignore sites if at least one subtree shows all gap/ambiguitity
if (first_ancestral_seq[first_id0][start_pos_seq + ptn] >= nstates ||
first_ancestral_seq[first_id1][start_pos_seq + ptn] >= nstates ||
second_ancestral_seq[second_id0][start_pos_seq + ptn] >= nstates ||
second_ancestral_seq[second_id1][start_pos_seq + ptn] >= nstates) {
continue;
}
StateType first_state0 = random_int_multinomial(nstates, first_ancestral_prob[first_id0]+ptn*nstates+start_pos, rstream);
StateType first_state1 = random_int_multinomial(nstates, first_ancestral_prob[first_id1]+ptn*nstates+start_pos, rstream);
StateType second_state0 = random_int_multinomial(nstates, second_ancestral_prob[second_id0]+ptn*nstates+start_pos, rstream);
Expand All @@ -533,11 +551,18 @@ void PhyloTree::computeAncestralSiteConcordance(Branch &branch, int nquartets, i
support[2] += (*it)->aln->at(ptn).frequency;
}
start_pos += nptn*nstates;
start_pos_seq += nptn;
}
} else {
size_t nptn = getAlnNPattern();
size_t nstates = model->num_states;
for (size_t ptn = 0; ptn < nptn; ptn++) {
if (first_ancestral_seq[first_id0][ptn] >= nstates ||
first_ancestral_seq[first_id1][ptn] >= nstates ||
second_ancestral_seq[second_id0][ptn] >= nstates ||
second_ancestral_seq[second_id1][ptn] >= nstates) {
continue;
}
StateType first_state0 = random_int_multinomial(nstates, first_ancestral_prob[first_id0]+ptn*nstates, rstream);
StateType first_state1 = random_int_multinomial(nstates, first_ancestral_prob[first_id1]+ptn*nstates, rstream);
StateType second_state0 = random_int_multinomial(nstates, second_ancestral_prob[second_id0]+ptn*nstates, rstream);
Expand Down Expand Up @@ -594,6 +619,10 @@ void PhyloTree::computeAncestralSiteConcordance(Branch &branch, int nquartets, i
aligned_free(*it2);
for (auto it1 = first_ancestral_prob.rbegin(); it1 != first_ancestral_prob.rend(); it1++)
aligned_free(*it1);
for (auto it3 = second_ancestral_seq.rbegin(); it3 != second_ancestral_seq.rend(); it3++)
aligned_free(*it3);
for (auto it4 = first_ancestral_seq.rbegin(); it4 != first_ancestral_seq.rend(); it4++)
aligned_free(*it4);
}


Expand Down
14 changes: 14 additions & 0 deletions utils/tools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2021,6 +2021,7 @@ void parseArg(int argc, char *argv[], Params &params) {
continue;
}
if (strcmp(argv[cnt], "--bscf") == 0 || strcmp(argv[cnt], "--scfl") == 0) {
// UPDATE: sCFL now ignore subtrees with all gaps for a particular site
if (params.consensus_type == CT_ASSIGN_SUPPORT_EXTENDED)
throw "Do not specify --scf or --gcf with --scfl";
params.ancestral_site_concordance = 2;
Expand All @@ -2032,6 +2033,19 @@ void parseArg(int argc, char *argv[], Params &params) {
throw "Positive --scfl please";
continue;
}
if (strcmp(argv[cnt], "--scflg") == 0) {
// OUTDATED: with gaps for historical reason
if (params.consensus_type == CT_ASSIGN_SUPPORT_EXTENDED)
throw "Do not specify --scf or --gcf with --scflg";
params.ancestral_site_concordance = 3;
cnt++;
if (cnt >= argc)
throw "Use --scflg NUM_QUARTETS";
params.site_concordance = convert_int(argv[cnt]);
if (params.site_concordance < 1)
throw "Positive --scflg please";
continue;
}
if (strcmp(argv[cnt], "--scf1") == 0) {
throw "--scf1 option does not exist. Do you mean --scfl?";
continue;
Expand Down

0 comments on commit bd3468c

Please sign in to comment.