Skip to content

Commit

Permalink
Smaller bug fixes: INVDUP, better support for smaller indel
Browse files Browse the repository at this point in the history
  • Loading branch information
fritzsedlazeck committed Apr 7, 2017
1 parent 4a6246b commit 813f669
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 41 deletions.
30 changes: 21 additions & 9 deletions src/Alignment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,12 @@ vector<differences_str> Alignment::summarizeAlignment() {
vector<differences_str> events;
int pos = this->getPosition();
differences_str ev;
bool flag = false; // (strcmp(this->getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0);
bool flag = (strcmp(this->getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0);

for (size_t i = 0; i < al->CigarData.size(); i++) {
if(flag){
// cout<<al->CigarData[i].Type<<al->CigarData[i].Length<<std::endl;
}
if (al->CigarData[i].Type == 'D') {
ev.position = pos;
ev.type = al->CigarData[i].Length; //deletion
Expand All @@ -88,7 +91,7 @@ vector<differences_str> Alignment::summarizeAlignment() {
pos += al->CigarData[i].Length;
} else if (al->CigarData[i].Type == 'N') {
pos += al->CigarData[i].Length;
}else if (al->CigarData[i].Type == 'S' && al->CigarData[i].Length > Parameter::Instance()->huge_ins) { /// Used for reads ranging into an inser
} else if (al->CigarData[i].Type == 'S' && al->CigarData[i].Length > Parameter::Instance()->huge_ins) { /// Used for reads ranging into an inser
string sa;
al->GetTag("SA", sa);
uint32_t sv;
Expand All @@ -108,8 +111,9 @@ vector<differences_str> Alignment::summarizeAlignment() {
}

if (flag) {
std::cout<<"FIRST:"<<std::endl;
for (size_t i = 0; i < events.size(); i++) {
if (abs(events[i].type) > 3000) {
if (abs(events[i].type) > 200) {
cout << events[i].position << " " << events[i].type << endl;
}
}
Expand Down Expand Up @@ -158,8 +162,9 @@ vector<differences_str> Alignment::summarizeAlignment() {
}

if (flag) {
std::cout<<"SECOND:"<<std::endl;
for (size_t i = 0; i < events.size(); i++) {
if (abs(events[i].type) > 3000) {
if (abs(events[i].type) > 200) {
cout << events[i].position << " " << events[i].type << endl;
}
}
Expand All @@ -170,7 +175,7 @@ vector<differences_str> Alignment::summarizeAlignment() {
// comp_aln = clock();
size_t i = 0;
//to erase stretches of consecutive mismatches == N in the ref
int break_point = 0;
/*int break_point = 0;
while (i < events.size()) {
if (events[i].position > max_size) {
while (i < events.size()) {
Expand All @@ -195,18 +200,19 @@ vector<differences_str> Alignment::summarizeAlignment() {
} else {
i++;
}
}
}*/
// Parameter::Instance()->meassure_time(comp_aln, "\t\terrase N: ");
if (flag) {
cout << "LAST:" << endl;
for (size_t i = 0; i < events.size(); i++) {
if (abs(events[i].type) > 3000) {
if (abs(events[i].type) > 200) {
cout << events[i].position << " " << events[i].type << endl;
}
}
cout << endl;
}

cout << "total: "<< events.size()<< endl;
}
return events;
}
void Alignment::computeAlignment() {
Expand Down Expand Up @@ -1082,20 +1088,26 @@ vector<int> Alignment::get_avg_diff(double & dist) {

vector<str_event> Alignment::get_events_Aln() {

bool flag = (strcmp(this->getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0);

//clock_t comp_aln = clock();
vector<differences_str> event_aln = summarizeAlignment();
if(flag){
std::cout<<"test size: "<<event_aln.size()<<std::endl;
}
//double time2 = Parameter::Instance()->meassure_time(comp_aln, "\tcompAln Events: ");

vector<str_event> events;
PlaneSweep_slim * plane = new PlaneSweep_slim();
vector<pair_str> profile;
// comp_aln = clock();

bool flag = (strcmp(this->getName().c_str(), Parameter::Instance()->read_name.c_str()) == 0);


int noise_events = 0;
//compute the profile of differences:
for (size_t i = 0; i < event_aln.size(); i++) {

pair_str tmp;
tmp.position = -1;
if (event_aln[i].type == 0) {
Expand Down
1 change: 1 addition & 0 deletions src/Paramer.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class Parameter {
int min_grouping_support; //min num reads supporting the overlap of two SVs
int huge_ins;
int max_dist_alns;
int min_segment_size;

// bool splitthreader_output;
bool debug;
Expand Down
14 changes: 9 additions & 5 deletions src/Sniffles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ void read_parameters(int argc, char *argv[]) {
TCLAP::ValueArg<int> arg_minlength("l", "min_length", "Minimum length of SV to be reported. Default: 30", false, 30, "int");
TCLAP::ValueArg<int> arg_mq("q", "minmapping_qual", "Minimum Mapping Quality. Default: 20", false, 20, "int");
TCLAP::ValueArg<int> arg_numreads("n", "num_reads_report", "Report up to N reads that support the SV in the vcf file. Default: 0", false, 0, "int");
TCLAP::ValueArg<int> arg_segsize("r","min_seq_size","Discard read if non of its segment is larger then this. Default: 2kb",false,2000,"int");
TCLAP::ValueArg<std::string> arg_tmp_file("", "tmp_file", "path to temporary file otherwise Sniffles will use the current directory.", false, "", "string");
TCLAP::SwitchArg arg_genotype("", "genotype", "Enables Sniffles to compute the genotypes.", cmd, false);
TCLAP::SwitchArg arg_cluster("", "cluster", "Enables Sniffles to phase SVs that occur on the same reads", cmd, false);
Expand All @@ -53,6 +54,7 @@ void read_parameters(int argc, char *argv[]) {

cmd.add(arg_cluster_supp);
cmd.add(arg_numreads);
cmd.add(arg_segsize);
cmd.add(arg_tmp_file);
cmd.add(arg_dist);
cmd.add(arg_threads);
Expand All @@ -64,12 +66,13 @@ void read_parameters(int argc, char *argv[]) {
cmd.add(arg_allelefreq);
cmd.add(arg_support);
cmd.add(arg_bamfile);

//parse cmd:
cmd.parse(argc, argv);

Parameter::Instance()->debug = true;
Parameter::Instance()->score_treshold = 10;
Parameter::Instance()->read_name = " "; //"22_36746138"; //just for debuging reasons!
Parameter::Instance()->read_name = "m151104_233737_42291_c100924012550000001823194105121673_s1_p0/122462/0_24344";//m151102_123142_42286_c100922632550000001823194205121665_s1_p0/80643/0_20394"; //"22_36746138"; //just for debuging reasons!
Parameter::Instance()->bam_files.push_back(arg_bamfile.getValue());
Parameter::Instance()->min_mq = arg_mq.getValue();
Parameter::Instance()->output_vcf = arg_vcf.getValue();
Expand All @@ -85,7 +88,7 @@ void read_parameters(int argc, char *argv[]) {
Parameter::Instance()->tmp_file = arg_tmp_file.getValue();
Parameter::Instance()->min_grouping_support = arg_cluster_supp.getValue();
Parameter::Instance()->min_allelel_frequency = arg_allelefreq.getValue();

Parameter::Instance()->min_segment_size = arg_segsize.getValue();
if (Parameter::Instance()->min_allelel_frequency > 0) {
std::cerr << "Automatically enabling genotype mode" << std::endl;
Parameter::Instance()->genotype = true;
Expand All @@ -94,9 +97,10 @@ void read_parameters(int argc, char *argv[]) {
if (Parameter::Instance()->tmp_file.empty()) {
std::stringstream ss;
srand(time(NULL));
ss << rand();
sleep(5);
ss << rand();
//ss << rand();
//sleep(5);
//ss << rand();
ss << arg_bamfile.getValue();
ss << "_tmp";
Parameter::Instance()->tmp_file = ss.str(); //check if file exists! -> if yes throw the dice again
}
Expand Down
3 changes: 2 additions & 1 deletion src/print/IPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include "IPrinter.h"


bool IPrinter::to_print(Breakpoint * &SV, pair<double, double>& std, pair<double, double> & kurtosis, double & std_length) {

std.first = 0;
Expand All @@ -25,7 +26,7 @@ bool IPrinter::to_print(Breakpoint * &SV, pair<double, double>& std, pair<double
double dist = (double) (SV->get_coordinates().stop.most_support - SV->get_coordinates().start.most_support);
//dist = (double)std::min(((int)dist * 4), Parameter::Instance()->max_dist);
dist = dist * 4.0 * (uniform_variance / 2); //because we test against corrected value!
//std::cout<<"DIST: "<<(SV->get_coordinates().stop.most_support - SV->get_coordinates().start.most_support)<<" "<<dist<<" STD: "<<std_start<<" "<<std_stop<<std::endl;
// std::cout<<"DIST: "<<(SV->get_coordinates().stop.most_support - SV->get_coordinates().start.most_support)<<" "<<dist<<" STD: "<<std.first<<" "<<std.second<<std::endl;
return ((std.first < dist && std.second < dist)); //0.2886751
}

Expand Down
51 changes: 28 additions & 23 deletions src/sub/Breakpoint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,13 +122,13 @@ long Breakpoint::overlap(Breakpoint * tmp) {
}
//merging two robust calls:
/*if (is_same_strand(tmp) && (abs(tmp->get_coordinates().start.min_pos - positions.start.min_pos) < max_dist || abs(tmp->get_coordinates().stop.max_pos - positions.stop.max_pos) < max_dist)) {
if (tmp->get_coordinates().stop.max_pos - tmp->get_coordinates().start.min_pos == Parameter::Instance()->huge_ins || positions.stop.max_pos - positions.start.min_pos == Parameter::Instance()->huge_ins) {
if (flag) {
cout << "\tHIT" << endl;
}
return 0;
}
}*/
if (tmp->get_coordinates().stop.max_pos - tmp->get_coordinates().start.min_pos == Parameter::Instance()->huge_ins || positions.stop.max_pos - positions.start.min_pos == Parameter::Instance()->huge_ins) {
if (flag) {
cout << "\tHIT" << endl;
}
return 0;
}
}*/

if (is_same_strand(tmp) && (abs(tmp->get_coordinates().start.min_pos - positions.start.min_pos) < max_dist && abs(tmp->get_coordinates().stop.max_pos - positions.stop.max_pos) < max_dist)) {
if (flag) {
Expand Down Expand Up @@ -349,30 +349,35 @@ void Breakpoint::predict_SV() {

for (std::map<std::string, read_str>::iterator i = positions.support.begin(); i != positions.support.end(); i++) {
if ((*i).second.SV & this->sv_type) { // && !((*i).second.SV & INS && (*i).second.length==Parameter::Instance()->huge_ins)) { ///check type
//cout << "Hit" << endl;

if ((*i).second.coordinates.first != -1) {
if (starts.find((*i).second.coordinates.first) == starts.end()) {
starts[(*i).second.coordinates.first] = 1;
} else {
starts[(*i).second.coordinates.first]++;
if ((*i).second.length != Parameter::Instance()->huge_ins) {
if (starts.find((*i).second.coordinates.first) == starts.end()) {
starts[(*i).second.coordinates.first] = 1;
} else {
starts[(*i).second.coordinates.first]++;
}
}
start2.push_back((*i).second.coordinates.first);
}
if ((*i).second.coordinates.second != -1) { //TODO test
if (stops.find((*i).second.coordinates.second) == stops.end()) {
stops[(*i).second.coordinates.second] = 1;
} else {
stops[(*i).second.coordinates.second]++;
if ((*i).second.length != Parameter::Instance()->huge_ins) {
if (stops.find((*i).second.coordinates.second) == stops.end()) {
stops[(*i).second.coordinates.second] = 1;
} else {
stops[(*i).second.coordinates.second]++;
}
}
stops2.push_back((*i).second.coordinates.second);
}
if ((*i).second.SV & INS) { //check lenght for ins only!
// std::cout<<"LENGTH 1st: "<<(*i).second.length<<" "<<(*i).first<<std::endl;
if (lengths.find((*i).second.length) == lengths.end()) {
lengths[(*i).second.length] = 1;
} else {

lengths[(*i).second.length]++;
if (((*i).second.SV & INS) ) { //check lenght for ins only!
if ((*i).second.length != Parameter::Instance()->huge_ins) {
if (lengths.find((*i).second.length) == lengths.end()) {
lengths[(*i).second.length] = 1;
} else {

lengths[(*i).second.length]++;
}
}
lengths2.push_back((*i).second.length);
}
Expand Down
11 changes: 8 additions & 3 deletions src/sub/Detect_Breakpoints.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ long fuck_off(long pos, RefVector ref, std::string &chr) {
chr = ref[i].RefName;
return pos + ref[i].RefLength + (long) Parameter::Instance()->max_dist;
}


void store_pos(vector<hist_str> &positions, long pos, std::string read_name) {
for (size_t i = 0; i < positions.size(); i++) {
if (abs(positions[i].position - pos) < Parameter::Instance()->min_length) {
Expand Down Expand Up @@ -424,7 +426,7 @@ void add_events(Alignment *& tmp, std::vector<str_event> events, short type, lon
Breakpoint * point = new Breakpoint(svs, events[i].length);
bst.insert(point, root);
//std::cout<<"Print:"<<std::endl;
//bst.print(root);
// bst.print(root);
}
}

Expand All @@ -442,6 +444,7 @@ void add_splits(Alignment *& tmp, std::vector<aln_str> events, short type, RefVe
}
}
}

for (size_t i = 1; i < events.size(); i++) {
position_str svs;
//position_str stop;
Expand Down Expand Up @@ -505,7 +508,9 @@ void add_splits(Alignment *& tmp, std::vector<aln_str> events, short type, RefVe

read.strand.first = events[i - 1].strand;
read.strand.second = !events[i].strand;
if (overlaps(events[i - 1], events[i])) {

bool is_overlapping=overlaps(events[i - 1], events[i]) ;
if (is_overlapping&& (events[i - 1].length > Parameter::Instance()->min_segment_size ||events[i].length > Parameter::Instance()->min_segment_size )) {
if (flag) {
std::cout << "Overlap curr: " << events[i].pos << " " << events[i].pos + events[i].length << " prev: " << events[i - 1].pos << " " << events[i - 1].pos + events[i - 1].length << " " << tmp->getName() << std::endl;
}
Expand All @@ -528,7 +533,7 @@ void add_splits(Alignment *& tmp, std::vector<aln_str> events, short type, RefVe
// std::cout<<"NEST: "<<svs.start.min_pos- get_ref_lengths(events[i - 1].RefID, ref) << " "<<svs.stop.max_pos - get_ref_lengths(events[i - 1].RefID, ref)<<" "<<tmp->getName()<<std::endl;
// }
// svs.stop.max_pos = svs.start.min_pos + Parameter::Instance()->min_length * 2;
} else {
} else if(!is_overlapping){
read.SV |= INV;
if (events[i - 1].strand) {
svs.start.min_pos = events[i - 1].pos + events[i - 1].length + get_ref_lengths(events[i - 1].RefID, ref);
Expand Down

0 comments on commit 813f669

Please sign in to comment.