From 7b92795b2c66dd44053ecf1f76fa54147a83f1cd Mon Sep 17 00:00:00 2001 From: JW <34543031+jwcodee@users.noreply.github.com> Date: Tue, 2 Apr 2024 14:41:28 -0700 Subject: [PATCH] gr-path: add log for number of valid reads visited when making a silver path (#132) * gr-path: add log for number of valid reads visited when making a silver path * bin/goldrush: set verbose flag to on by default * goldrush_path.cpp: add space to log messages Co-authored-by: Lauren Coombe --------- Co-authored-by: Lauren Coombe --- bin/goldrush | 12 ++++++------ goldrush_path/goldrush_path.cpp | 22 +++++++++++++++++----- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/bin/goldrush b/bin/goldrush index d2dda17..20050ce 100755 --- a/bin/goldrush +++ b/bin/goldrush @@ -231,11 +231,11 @@ goldrush-path: $(p2).fa check-G check-reads clean $(p2).fa: $(p1)_all.fq ifneq ($(k), 22) - $(time) goldrush-path -k $(k) -w $(w) -t $(tile) -u $(u) -a $(a) -o $(o) -p $(p2) -i $< -h $(h) -j $(t) -P $(P) -d $(d) -x$(x) -g $(G) -b $(b) -m 0 + $(time) goldrush-path -k $(k) -w $(w) -t $(tile) -u $(u) -a $(a) -o $(o) -p $(p2) -i $< -h $(h) -j $(t) -P $(P) -d $(d) -x$(x) -g $(G) -b $(b) -m 0 --verbose else ifneq ($(w), 16) - $(time) goldrush-path -k $(k) -w $(w) -t $(tile) -u $(u) -a $(a) -o $(o) -p $(p2) -i $< -h $(h) -j $(t) -P $(P) -d $(d) -x$(x) -g $(G) -b $(b) -m 0 + $(time) goldrush-path -k $(k) -w $(w) -t $(tile) -u $(u) -a $(a) -o $(o) -p $(p2) -i $< -h $(h) -j $(t) -P $(P) -d $(d) -x$(x) -g $(G) -b $(b) -m 0 --verbose else - $(time) goldrush-path -k $(k) -w $(w) -t $(tile) -u $(u) -a $(a) -o $(o) -p $(p2) -i $< -h $(h) -j $(t) -P $(P) -d $(d) -x$(x) -s $(s) -g $(G) -b $(b) -m 0 + $(time) goldrush-path -k $(k) -w $(w) -t $(tile) -u $(u) -a $(a) -o $(o) -p $(p2) -i $< -h $(h) -j $(t) -P $(P) -d $(d) -x$(x) -s $(s) -g $(G) -b $(b) -m 0 --verbose endif echo "Done GoldRush-Path! Golden path can be found in: $@" @@ -244,11 +244,11 @@ $(p1)_all.fq: $(p1)_$(M).fq $(p1)_$(M).fq: $(long_reads) ifneq ($(k), 22) - $(time) goldrush-path -k $(k) -w $(w) -t $(tile) -u $(u) -a $(a) -o $(o) -p $(p1) -i $< -h $(h) -j $(t) -x$(x) -P $(P) -d $(d) -g $(G) -b $(b) -r $(r) --silver_path -M $(M) -m $(m) + $(time) goldrush-path -k $(k) -w $(w) -t $(tile) -u $(u) -a $(a) -o $(o) -p $(p1) -i $< -h $(h) -j $(t) -x$(x) -P $(P) -d $(d) -g $(G) -b $(b) -r $(r) --silver_path -M $(M) -m $(m) --verbose else ifneq ($(w), 16) - $(time) goldrush-path -k $(k) -w $(w) -t $(tile) -u $(u) -a $(a) -o $(o) -p $(p1) -i $< -h $(h) -j $(t) -x$(x) -P $(P) -d $(d) -g $(G) -b $(b) -r $(r) --silver_path -M $(M) -m $(m) + $(time) goldrush-path -k $(k) -w $(w) -t $(tile) -u $(u) -a $(a) -o $(o) -p $(p1) -i $< -h $(h) -j $(t) -x$(x) -P $(P) -d $(d) -g $(G) -b $(b) -r $(r) --silver_path -M $(M) -m $(m) --verbose else - $(time) goldrush-path -k $(k) -w $(w) -t $(tile) -u $(u) -a $(a) -o $(o) -p $(p1) -i $< -h $(h) -j $(t) -x$(x) -P $(P) -d $(d) -s $(s) -g $(G) -b $(b) -r $(r) --silver_path -M $(M) -m $(m) + $(time) goldrush-path -k $(k) -w $(w) -t $(tile) -u $(u) -a $(a) -o $(o) -p $(p1) -i $< -h $(h) -j $(t) -x$(x) -P $(P) -d $(d) -s $(s) -g $(G) -b $(b) -r $(r) --silver_path -M $(M) -m $(m) --verbose endif %.racon-polished.fa: %.fa.$(long_reads).sam %.fa diff --git a/goldrush_path/goldrush_path.cpp b/goldrush_path/goldrush_path.cpp index 93ed28c..62a6bc4 100644 --- a/goldrush_path/goldrush_path.cpp +++ b/goldrush_path/goldrush_path.cpp @@ -59,9 +59,14 @@ silver_path_check( uint64_t& inserted_bases, uint64_t& curr_path, uint32_t& ids_inserted, - MIBFConstructSupport& miBFCS) + MIBFConstructSupport& miBFCS, + uint32_t& valid_reads) { if (target_bases < inserted_bases) { + if (opt::verbose) { + std::cerr << "Visited " << valid_reads << " reads " << "to generate " << curr_path << " silver paths" << std::endl; + + } ++curr_path; if (opt::max_paths < curr_path) { exit(0); @@ -762,7 +767,8 @@ process_read(const btllib::SeqReader::Record& record, uint32_t& id, uint32_t& ids_inserted, const size_t min_seq_len, - const std::unordered_set& filter_out_reads) + const std::unordered_set& filter_out_reads, + uint32_t& valid_reads) { if (record.seq.size() < min_seq_len) { if (opt::debug) { @@ -855,11 +861,13 @@ process_read(const btllib::SeqReader::Record& record, inserted_bases, curr_path, ids_inserted, - miBFCS); + miBFCS, + valid_reads); } } else { if (num_assigned_tiles == num_tiles) { ++id; + ++valid_reads; if (opt::debug) { std::cerr << "complete assignment" << std::endl; } @@ -919,7 +927,8 @@ process_read(const btllib::SeqReader::Record& record, inserted_bases, curr_path, ids_inserted, - miBFCS); + miBFCS, + valid_reads); } } } @@ -931,6 +940,7 @@ process_read(const btllib::SeqReader::Record& record, // output read to wood path } ++id; + ++valid_reads; if (id % 10000 == 0) { std::cerr << "processed " << id << " reads" << std::endl; } @@ -1063,6 +1073,7 @@ main(int argc, char** argv) uint64_t inserted_bases = 0; uint64_t target_bases = opt::ratio * opt::genome_size; uint64_t curr_path = 1; + uint32_t valid_reads = 0; uint32_t id = 1; uint32_t ids_inserted = 0; // std::unordered_map id_to_num_tiles_inserted; @@ -1090,7 +1101,8 @@ main(int argc, char** argv) id, ids_inserted, opt::min_length, - filter_out_reads); + filter_out_reads, + valid_reads); } }