diff --git a/src/dist.cpp b/src/dist.cpp index 3c7310c..2c8cc78 100644 --- a/src/dist.cpp +++ b/src/dist.cpp @@ -149,7 +149,8 @@ void generate_ptrs_strs( std::shared_ptr query_vars, size_t query_clust_beg_idx, size_t query_clust_end_idx, int beg_pos, int end_pos, - std::shared_ptr ref, const std::string & ctg + std::shared_ptr ref, const std::string & ctg, + int min_qual /* = 0 */ ) { // generate query and ref strings and pointers @@ -163,40 +164,42 @@ void generate_ptrs_strs( if (query_var_idx < query_end_idx && ref_pos == query_vars->poss[query_var_idx]) { // start query variant - switch (query_vars->types[query_var_idx]) { - case TYPE_INS: - query_ptrs[PTRS].insert(query_ptrs[PTRS].end(), - query_vars->alts[query_var_idx].size(), ref_str.size()-1); - query_ptrs[FLAGS].insert(query_ptrs[FLAGS].end(), - query_vars->alts[query_var_idx].size(), PTR_VARIANT); - query_ptrs[FLAGS][query_ptrs[FLAGS].size()-1] |= PTR_VAR_END; - query_ptrs[FLAGS][query_ptrs[FLAGS].size() - - query_vars->alts[query_var_idx].size()] |= PTR_VAR_BEG | PTR_INS_LOC; - query_str += query_vars->alts[query_var_idx]; - break; - case TYPE_DEL: - ref_ptrs[PTRS].insert(ref_ptrs[PTRS].end(), - query_vars->refs[query_var_idx].size(), query_str.size()-1); - ref_ptrs[FLAGS].insert(ref_ptrs[FLAGS].end(), - query_vars->refs[query_var_idx].size(), PTR_VARIANT); - ref_ptrs[FLAGS][ref_ptrs[FLAGS].size()-1] |= PTR_VAR_END; - ref_ptrs[FLAGS][ref_ptrs[FLAGS].size() - - query_vars->refs[query_var_idx].size()] |= PTR_VAR_BEG; - ref_str += query_vars->refs[query_var_idx]; - ref_pos += query_vars->refs[query_var_idx].size(); - break; - case TYPE_SUB: - ref_ptrs[PTRS].push_back(query_str.size()); - ref_ptrs[FLAGS].push_back(PTR_VARIANT|PTR_VAR_BEG|PTR_VAR_END); - query_ptrs[PTRS].push_back(ref_str.size()); - query_ptrs[FLAGS].push_back(PTR_VARIANT|PTR_VAR_BEG|PTR_VAR_END); - ref_str += query_vars->refs[query_var_idx]; - query_str += query_vars->alts[query_var_idx]; - ref_pos++; - break; - default: - ERROR("Unexpected variant type '%d' in generate_ptrs_strs()", - query_vars->types[query_var_idx]); + if (query_vars->var_quals[query_var_idx] >= min_qual) { + switch (query_vars->types[query_var_idx]) { + case TYPE_INS: + query_ptrs[PTRS].insert(query_ptrs[PTRS].end(), + query_vars->alts[query_var_idx].size(), ref_str.size()-1); + query_ptrs[FLAGS].insert(query_ptrs[FLAGS].end(), + query_vars->alts[query_var_idx].size(), PTR_VARIANT); + query_ptrs[FLAGS][query_ptrs[FLAGS].size()-1] |= PTR_VAR_END; + query_ptrs[FLAGS][query_ptrs[FLAGS].size() - + query_vars->alts[query_var_idx].size()] |= PTR_VAR_BEG | PTR_INS_LOC; + query_str += query_vars->alts[query_var_idx]; + break; + case TYPE_DEL: + ref_ptrs[PTRS].insert(ref_ptrs[PTRS].end(), + query_vars->refs[query_var_idx].size(), query_str.size()-1); + ref_ptrs[FLAGS].insert(ref_ptrs[FLAGS].end(), + query_vars->refs[query_var_idx].size(), PTR_VARIANT); + ref_ptrs[FLAGS][ref_ptrs[FLAGS].size()-1] |= PTR_VAR_END; + ref_ptrs[FLAGS][ref_ptrs[FLAGS].size() - + query_vars->refs[query_var_idx].size()] |= PTR_VAR_BEG; + ref_str += query_vars->refs[query_var_idx]; + ref_pos += query_vars->refs[query_var_idx].size(); + break; + case TYPE_SUB: + ref_ptrs[PTRS].push_back(query_str.size()); + ref_ptrs[FLAGS].push_back(PTR_VARIANT|PTR_VAR_BEG|PTR_VAR_END); + query_ptrs[PTRS].push_back(ref_str.size()); + query_ptrs[FLAGS].push_back(PTR_VARIANT|PTR_VAR_BEG|PTR_VAR_END); + ref_str += query_vars->refs[query_var_idx]; + query_str += query_vars->alts[query_var_idx]; + ref_pos++; + break; + default: + ERROR("Unexpected variant type '%d' in generate_ptrs_strs()", + query_vars->types[query_var_idx]); + } } query_var_idx++; // next variant @@ -1987,12 +1990,14 @@ editData edits_wrapper(std::shared_ptr clusterdata_ptr) { if(false) printf("qual: %d-%d\n", prev_qual, qual-1); // generate query string (only applying variants with Q>=qual) - std::string query = generate_str( - clusterdata_ptr->ref, + std::string query, ref; + std::vector< std::vector > query_ref_ptrs, ref_query_ptrs; + generate_ptrs_strs( + query, ref, query_ref_ptrs, ref_query_ptrs, sc->ctg_variants[QUERY][hap], - ctg, beg_idx, end_idx, + beg_idx, end_idx, sc->begs[sc_idx], sc->ends[sc_idx], - prev_qual); + clusterdata_ptr->ref, ctg, prev_qual); // align strings, backtrack, calculate distance std::vector< std::vector< std::vector > > ptrs(MATS); diff --git a/src/dist.h b/src/dist.h index bca86fe..99528ed 100644 --- a/src/dist.h +++ b/src/dist.h @@ -103,7 +103,7 @@ void generate_ptrs_strs( std::shared_ptr query_vars, size_t query_clust_beg_idx, size_t ref_clust_beg_idx, int beg_pos, int end_pos, std::shared_ptr ref, - const std::string & ctg + const std::string & ctg, int min_qual = 0 ); void reverse_ptrs_strs( diff --git a/src/edit.cpp b/src/edit.cpp index 2e56ced..275aa2d 100644 --- a/src/edit.cpp +++ b/src/edit.cpp @@ -205,7 +205,7 @@ void editData::write_distance() { dists_summ = fopen(dist_summ_fn.data(), "w"); if (g.verbosity >= 1) INFO(" Writing distance summary to '%s'", dist_summ_fn.data()); - fprintf(dists_summ, "VAR_TYPE\tMIN_QUAL\tEDIT_DIST\tDISTINCT_EDITS\tED_QSCORE\tDE_QSCORE\tALN_QSCORE\n"); + fprintf(dists_summ, "VAR_TYPE\tTHRESHOLD\tMIN_QUAL\tEDIT_DIST\tDISTINCT_EDITS\tED_QSCORE\tDE_QSCORE\tALN_QSCORE\n"); } INFO(" "); INFO("%sALIGNMENT DISTANCE SUMMARY%s", COLOR_BLUE, COLOR_WHITE); @@ -219,16 +219,18 @@ void editData::write_distance() { INFO(" "); if (type == TYPE_ALL) { - INFO("%sTYPE\tMIN_QUAL\tEDIT_DIST\tDISTINCT_EDITS\tED_QSCORE\tDE_QSCORE\tALN_QSCORE%s", + INFO("%sTYPE\tTHRESHOLD\tEDIT_DIST\tDISTINCT_EDITS\tED_QSCORE\tDE_QSCORE\tALN_QSCORE%s", COLOR_BLUE, COLOR_WHITE); } else { - INFO("%sTYPE\tMIN_QUAL\tEDIT_DIST\tDISTINCT_EDITS\tED_QSCORE\tDE_QSCORE%s", + INFO("%sTYPE\tTHRESHOLD\tEDIT_DIST\tDISTINCT_EDITS\tED_QSCORE\tDE_QSCORE%s", COLOR_BLUE, COLOR_WHITE); } std::vector quals = {g.min_qual, best_qual[type], g.max_qual+1}; - for (int q : quals) { + std::vector thresholds = {"NONE", "BEST", "REF "}; + for (int i = 0; i < int(quals.size()); i++) { // fill out ED/DE for selected quals + int q = quals[i]; std::vector edit_dists(TYPES, 0); std::vector distinct_edits(TYPES, 0); for (int type = 0; type < TYPES; type++) { @@ -241,13 +243,16 @@ void editData::write_distance() { float all_qscore = type == TYPE_ALL ? qscore(double(this->get_score(q)) / orig_score) : 0; // print summary - if (g.write) fprintf(dists_summ, "%s\t%d\t%d\t%d\t%f\t%f\t%f\n", type_strs2[type].data(), + if (g.write) fprintf(dists_summ, "%s\t%s\t%d\t%d\t%d\t%f\t%f\t%f\n", + type_strs2[type].data(), thresholds[i].data(), q, edit_dists[type], distinct_edits[type], ed_qscore, de_qscore, all_qscore); if (type == TYPE_ALL) { - INFO("%s%s\tQ >= %d\t\t%-16d%-16d%f\t%f\t%f%s", COLOR_BLUE, type_strs2[type].data(), + INFO("%s%s\t%s Q >= %d\t%-16d%-16d%f\t%f\t%f%s", + COLOR_BLUE, type_strs2[type].data(), thresholds[i].data(), q, edit_dists[type], distinct_edits[type], ed_qscore, de_qscore, all_qscore, COLOR_WHITE); } else { - INFO("%s%s\tQ >= %d\t\t%-16d%-16d%f\t%f%s", COLOR_BLUE, type_strs2[type].data(), + INFO("%s%s\t%s Q >= %d\t%-16d%-16d%f\t%f%s", + COLOR_BLUE, type_strs2[type].data(), thresholds[i].data(), q, edit_dists[type], distinct_edits[type], ed_qscore, de_qscore, COLOR_WHITE); } }