Skip to content

Commit

Permalink
move long ref warning under --expectTranscriptome flag
Browse files Browse the repository at this point in the history
  • Loading branch information
Rob Patro committed Aug 25, 2020
1 parent 82c2390 commit 781ce48
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 4 deletions.
1 change: 1 addition & 0 deletions include/ProgOpts.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class IndexOptions {
bool buildEdgeVec{false};
bool buildEqCls{false};
bool featuresRef{false};
bool expect_transcriptome{false};
std::string twopaco_tmp_dir{""};
};

Expand Down
11 changes: 7 additions & 4 deletions src/FixFasta.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ bool fixFasta(single_parser* parser,
// std::string& outputDir,
spp::sparse_hash_set<std::string>& decoyNames,
bool keepDuplicates, uint32_t k,
std::string& sepStr, std::mutex& iomutex,
std::string& sepStr, bool expect_transcriptome, std::mutex& iomutex,
std::shared_ptr<spdlog::logger> log, std::string outFile,
std::vector<uint32_t>& refIdExtensions,
std::vector<std::pair<std::string, uint16_t>>& shortRefs) {
Expand Down Expand Up @@ -222,7 +222,7 @@ bool fixFasta(single_parser* parser,

// If we're suspicious the user has fed in a *genome* rather
// than a transcriptome, say so here.
if (readStr.size() >= tooLong and !isDecoy) {
if (readStr.size() >= tooLong and !isDecoy and expect_transcriptome) {
log->warn("Entry with header [{}] was longer than {} nucleotides. "
"This is probably a chromosome instead of a transcript.",
read.name, tooLong);
Expand Down Expand Up @@ -624,6 +624,7 @@ int fixFastaMain(std::vector<std::string>& args,
std::string decoyFile;
bool keepDuplicates{false};
bool printHelp{false};
bool expect_transcriptome{false};
std::string sepStr{" \t"};

auto cli = (
Expand All @@ -634,6 +635,8 @@ int fixFastaMain(std::vector<std::string>& args,
"Instead of a space or tab, break the header at the first "
"occurrence of this string, and name the transcript as the token before "
"the first separator (default = space & tab)",
option("--expectTranscriptome").set(expect_transcriptome) %
"expect (non-decoy) sequences to be transcripts rather than genomic contigs",
option("--decoys", "-d") & value("decoys", decoyFile) %
"Treat these sequences as decoys that may be sequence-similar to some known indexed reference",
option("--keepDuplicates").set(keepDuplicates) % "Retain duplicate references in the input",
Expand Down Expand Up @@ -675,8 +678,8 @@ int fixFastaMain(std::vector<std::string>& args,
transcriptParserPtr.reset(new single_parser(refFiles, numThreads, numProd));
transcriptParserPtr->start();
std::mutex iomutex;
fix_ok = fixFasta(transcriptParserPtr.get(), decoyNames, keepDuplicates, k, sepStr, iomutex, log,
outFile, refIdExtension, shortRefs);
fix_ok = fixFasta(transcriptParserPtr.get(), decoyNames, keepDuplicates, k, sepStr, expect_transcriptome,
iomutex, log, outFile, refIdExtension, shortRefs);
transcriptParserPtr->stop();
}

Expand Down
2 changes: 2 additions & 0 deletions src/Pufferfish.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ int main(int argc, char* argv[]) {
(required("-r", "--ref") & values(ensure_file_exists, "ref_file", indexOpt.rfile)) % "path to the reference fasta file",
(required("-o", "--output") & value("output_dir", indexOpt.outdir)) % "directory where index is written",
//(required("-g", "--gfa") & value("gfa_file", indexOpt.gfa_file)) % "path to the GFA file",
(option("--expectTranscriptome").set(indexOpt.expect_transcriptome) %
"expect (non-decoy) sequences to be transcripts rather than genomic contigs"),
(option("--headerSep") & value("sep_strs", indexOpt.header_sep)) %
"Instead of a space or tab, break the header at the first "
"occurrence of this string, and name the transcript as the token before "
Expand Down
3 changes: 3 additions & 0 deletions src/PufferfishIndexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,9 @@ int pufferfishIndex(pufferfish::IndexOptions& indexOpts) {
if (indexOpts.keep_duplicates) {
args.push_back("--keepDuplicates");
}
if (indexOpts.expect_transcriptome) {
args.push_back("--expectTranscriptome");
}
args.push_back("--klen");
args.push_back(std::to_string(k));
args.push_back("--input");
Expand Down

2 comments on commit 781ce48

@rob-p
Copy link
Contributor

@rob-p rob-p commented on 781ce48 Aug 25, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@fataltes — please review this commit and see if you agree that it addresses #20 adequately.

@fataltes
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks,
I think it covers the issue.

Please sign in to comment.