Skip to content

Commit

Permalink
Fixed erroneous INDEL edits
Browse files Browse the repository at this point in the history
 - generate_ptrs_strs() includes one more ref base than generate_strs()
 - slight improvements to distance summary printing
  • Loading branch information
TimD1 committed Mar 25, 2024
1 parent 60178c8 commit 7aec7a6
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 47 deletions.
83 changes: 44 additions & 39 deletions src/dist.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@ void generate_ptrs_strs(
std::shared_ptr<ctgVariants> query_vars,
size_t query_clust_beg_idx, size_t query_clust_end_idx,
int beg_pos, int end_pos,
std::shared_ptr<fastaData> ref, const std::string & ctg
std::shared_ptr<fastaData> ref, const std::string & ctg,
int min_qual /* = 0 */
) {

// generate query and ref strings and pointers
Expand All @@ -163,40 +164,42 @@ void generate_ptrs_strs(

if (query_var_idx < query_end_idx &&
ref_pos == query_vars->poss[query_var_idx]) { // start query variant
switch (query_vars->types[query_var_idx]) {
case TYPE_INS:
query_ptrs[PTRS].insert(query_ptrs[PTRS].end(),
query_vars->alts[query_var_idx].size(), ref_str.size()-1);
query_ptrs[FLAGS].insert(query_ptrs[FLAGS].end(),
query_vars->alts[query_var_idx].size(), PTR_VARIANT);
query_ptrs[FLAGS][query_ptrs[FLAGS].size()-1] |= PTR_VAR_END;
query_ptrs[FLAGS][query_ptrs[FLAGS].size() -
query_vars->alts[query_var_idx].size()] |= PTR_VAR_BEG | PTR_INS_LOC;
query_str += query_vars->alts[query_var_idx];
break;
case TYPE_DEL:
ref_ptrs[PTRS].insert(ref_ptrs[PTRS].end(),
query_vars->refs[query_var_idx].size(), query_str.size()-1);
ref_ptrs[FLAGS].insert(ref_ptrs[FLAGS].end(),
query_vars->refs[query_var_idx].size(), PTR_VARIANT);
ref_ptrs[FLAGS][ref_ptrs[FLAGS].size()-1] |= PTR_VAR_END;
ref_ptrs[FLAGS][ref_ptrs[FLAGS].size() -
query_vars->refs[query_var_idx].size()] |= PTR_VAR_BEG;
ref_str += query_vars->refs[query_var_idx];
ref_pos += query_vars->refs[query_var_idx].size();
break;
case TYPE_SUB:
ref_ptrs[PTRS].push_back(query_str.size());
ref_ptrs[FLAGS].push_back(PTR_VARIANT|PTR_VAR_BEG|PTR_VAR_END);
query_ptrs[PTRS].push_back(ref_str.size());
query_ptrs[FLAGS].push_back(PTR_VARIANT|PTR_VAR_BEG|PTR_VAR_END);
ref_str += query_vars->refs[query_var_idx];
query_str += query_vars->alts[query_var_idx];
ref_pos++;
break;
default:
ERROR("Unexpected variant type '%d' in generate_ptrs_strs()",
query_vars->types[query_var_idx]);
if (query_vars->var_quals[query_var_idx] >= min_qual) {
switch (query_vars->types[query_var_idx]) {
case TYPE_INS:
query_ptrs[PTRS].insert(query_ptrs[PTRS].end(),
query_vars->alts[query_var_idx].size(), ref_str.size()-1);
query_ptrs[FLAGS].insert(query_ptrs[FLAGS].end(),
query_vars->alts[query_var_idx].size(), PTR_VARIANT);
query_ptrs[FLAGS][query_ptrs[FLAGS].size()-1] |= PTR_VAR_END;
query_ptrs[FLAGS][query_ptrs[FLAGS].size() -
query_vars->alts[query_var_idx].size()] |= PTR_VAR_BEG | PTR_INS_LOC;
query_str += query_vars->alts[query_var_idx];
break;
case TYPE_DEL:
ref_ptrs[PTRS].insert(ref_ptrs[PTRS].end(),
query_vars->refs[query_var_idx].size(), query_str.size()-1);
ref_ptrs[FLAGS].insert(ref_ptrs[FLAGS].end(),
query_vars->refs[query_var_idx].size(), PTR_VARIANT);
ref_ptrs[FLAGS][ref_ptrs[FLAGS].size()-1] |= PTR_VAR_END;
ref_ptrs[FLAGS][ref_ptrs[FLAGS].size() -
query_vars->refs[query_var_idx].size()] |= PTR_VAR_BEG;
ref_str += query_vars->refs[query_var_idx];
ref_pos += query_vars->refs[query_var_idx].size();
break;
case TYPE_SUB:
ref_ptrs[PTRS].push_back(query_str.size());
ref_ptrs[FLAGS].push_back(PTR_VARIANT|PTR_VAR_BEG|PTR_VAR_END);
query_ptrs[PTRS].push_back(ref_str.size());
query_ptrs[FLAGS].push_back(PTR_VARIANT|PTR_VAR_BEG|PTR_VAR_END);
ref_str += query_vars->refs[query_var_idx];
query_str += query_vars->alts[query_var_idx];
ref_pos++;
break;
default:
ERROR("Unexpected variant type '%d' in generate_ptrs_strs()",
query_vars->types[query_var_idx]);
}
}
query_var_idx++; // next variant

Expand Down Expand Up @@ -1987,12 +1990,14 @@ editData edits_wrapper(std::shared_ptr<superclusterData> clusterdata_ptr) {
if(false) printf("qual: %d-%d\n", prev_qual, qual-1);

// generate query string (only applying variants with Q>=qual)
std::string query = generate_str(
clusterdata_ptr->ref,
std::string query, ref;
std::vector< std::vector<int> > query_ref_ptrs, ref_query_ptrs;
generate_ptrs_strs(
query, ref, query_ref_ptrs, ref_query_ptrs,
sc->ctg_variants[QUERY][hap],
ctg, beg_idx, end_idx,
beg_idx, end_idx,
sc->begs[sc_idx], sc->ends[sc_idx],
prev_qual);
clusterdata_ptr->ref, ctg, prev_qual);

// align strings, backtrack, calculate distance
std::vector< std::vector< std::vector<uint8_t> > > ptrs(MATS);
Expand Down
2 changes: 1 addition & 1 deletion src/dist.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ void generate_ptrs_strs(
std::shared_ptr<ctgVariants> query_vars,
size_t query_clust_beg_idx, size_t ref_clust_beg_idx,
int beg_pos, int end_pos, std::shared_ptr<fastaData> ref,
const std::string & ctg
const std::string & ctg, int min_qual = 0
);

void reverse_ptrs_strs(
Expand Down
19 changes: 12 additions & 7 deletions src/edit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ void editData::write_distance() {
dists_summ = fopen(dist_summ_fn.data(), "w");
if (g.verbosity >= 1)
INFO(" Writing distance summary to '%s'", dist_summ_fn.data());
fprintf(dists_summ, "VAR_TYPE\tMIN_QUAL\tEDIT_DIST\tDISTINCT_EDITS\tED_QSCORE\tDE_QSCORE\tALN_QSCORE\n");
fprintf(dists_summ, "VAR_TYPE\tTHRESHOLD\tMIN_QUAL\tEDIT_DIST\tDISTINCT_EDITS\tED_QSCORE\tDE_QSCORE\tALN_QSCORE\n");
}
INFO(" ");
INFO("%sALIGNMENT DISTANCE SUMMARY%s", COLOR_BLUE, COLOR_WHITE);
Expand All @@ -219,16 +219,18 @@ void editData::write_distance() {

INFO(" ");
if (type == TYPE_ALL) {
INFO("%sTYPE\tMIN_QUAL\tEDIT_DIST\tDISTINCT_EDITS\tED_QSCORE\tDE_QSCORE\tALN_QSCORE%s",
INFO("%sTYPE\tTHRESHOLD\tEDIT_DIST\tDISTINCT_EDITS\tED_QSCORE\tDE_QSCORE\tALN_QSCORE%s",
COLOR_BLUE, COLOR_WHITE);
} else {
INFO("%sTYPE\tMIN_QUAL\tEDIT_DIST\tDISTINCT_EDITS\tED_QSCORE\tDE_QSCORE%s",
INFO("%sTYPE\tTHRESHOLD\tEDIT_DIST\tDISTINCT_EDITS\tED_QSCORE\tDE_QSCORE%s",
COLOR_BLUE, COLOR_WHITE);
}
std::vector<int> quals = {g.min_qual, best_qual[type], g.max_qual+1};
for (int q : quals) {
std::vector<std::string> thresholds = {"NONE", "BEST", "REF "};
for (int i = 0; i < int(quals.size()); i++) {

// fill out ED/DE for selected quals
int q = quals[i];
std::vector<int> edit_dists(TYPES, 0);
std::vector<int> distinct_edits(TYPES, 0);
for (int type = 0; type < TYPES; type++) {
Expand All @@ -241,13 +243,16 @@ void editData::write_distance() {
float all_qscore = type == TYPE_ALL ? qscore(double(this->get_score(q)) / orig_score) : 0;

// print summary
if (g.write) fprintf(dists_summ, "%s\t%d\t%d\t%d\t%f\t%f\t%f\n", type_strs2[type].data(),
if (g.write) fprintf(dists_summ, "%s\t%s\t%d\t%d\t%d\t%f\t%f\t%f\n",
type_strs2[type].data(), thresholds[i].data(),
q, edit_dists[type], distinct_edits[type], ed_qscore, de_qscore, all_qscore);
if (type == TYPE_ALL) {
INFO("%s%s\tQ >= %d\t\t%-16d%-16d%f\t%f\t%f%s", COLOR_BLUE, type_strs2[type].data(),
INFO("%s%s\t%s Q >= %d\t%-16d%-16d%f\t%f\t%f%s",
COLOR_BLUE, type_strs2[type].data(), thresholds[i].data(),
q, edit_dists[type], distinct_edits[type], ed_qscore, de_qscore, all_qscore, COLOR_WHITE);
} else {
INFO("%s%s\tQ >= %d\t\t%-16d%-16d%f\t%f%s", COLOR_BLUE, type_strs2[type].data(),
INFO("%s%s\t%s Q >= %d\t%-16d%-16d%f\t%f%s",
COLOR_BLUE, type_strs2[type].data(), thresholds[i].data(),
q, edit_dists[type], distinct_edits[type], ed_qscore, de_qscore, COLOR_WHITE);
}
}
Expand Down

0 comments on commit 7aec7a6

Please sign in to comment.