Skip to content

Commit

Permalink
Add ppos format-output to convertalis for count of positive substitut…
Browse files Browse the repository at this point in the history
…ion scores (#723)

Co-authored-by: dohyun-s <ts25>
  • Loading branch information
Dohyun-s authored Oct 31, 2023
1 parent fa6c093 commit 5edc79b
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/commons/Parameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ Parameters::Parameters():
PARAM_V(PARAM_V_ID, "-v", "Verbosity", "Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info", typeid(int), (void *) &verbosity, "^[0-3]{1}$", MMseqsParameter::COMMAND_COMMON),
// convertalignments
PARAM_FORMAT_MODE(PARAM_FORMAT_MODE_ID, "--format-mode", "Alignment format", "Output format:\n0: BLAST-TAB\n1: SAM\n2: BLAST-TAB + query/db length\n3: Pretty HTML\n4: BLAST-TAB + column headers\nBLAST-TAB (0) and BLAST-TAB + column headers (4) support custom output formats (--format-output)", typeid(int), (void *) &formatAlignmentMode, "^[0-4]{1}$"),
PARAM_FORMAT_OUTPUT(PARAM_FORMAT_OUTPUT_ID, "--format-output", "Format alignment output", "Choose comma separated list of output columns from: query,target,evalue,gapopen,pident,fident,nident,qstart,qend,qlen\ntstart,tend,tlen,alnlen,raw,bits,cigar,qseq,tseq,qheader,theader,qaln,taln,qframe,tframe,mismatch,qcov,tcov\nqset,qsetid,tset,tsetid,taxid,taxname,taxlineage,qorfstart,qorfend,torfstart,torfend", typeid(std::string), (void *) &outfmt, ""),
PARAM_FORMAT_OUTPUT(PARAM_FORMAT_OUTPUT_ID, "--format-output", "Format alignment output", "Choose comma separated list of output columns from: query,target,evalue,gapopen,pident,fident,nident,qstart,qend,qlen\ntstart,tend,tlen,alnlen,raw,bits,cigar,qseq,tseq,qheader,theader,qaln,taln,qframe,tframe,mismatch,qcov,tcov\nqset,qsetid,tset,tsetid,taxid,taxname,taxlineage,qorfstart,qorfend,torfstart,torfend,ppos", typeid(std::string), (void *) &outfmt, ""),
PARAM_DB_OUTPUT(PARAM_DB_OUTPUT_ID, "--db-output", "Database output", "Return a result DB instead of a text file", typeid(bool), (void *) &dbOut, "", MMseqsParameter::COMMAND_EXPERT),
// --include-only-extendablediagonal
PARAM_RESCORE_MODE(PARAM_RESCORE_MODE_ID, "--rescore-mode", "Rescore mode", "Rescore diagonals with:\n0: Hamming distance\n1: local alignment (score only)\n2: local alignment\n3: global alignment\n4: longest alignment fulfilling window quality criterion", typeid(int), (void *) &rescoreMode, "^[0-4]{1}$"),
Expand Down Expand Up @@ -2828,6 +2828,7 @@ std::vector<int> Parameters::getOutputFormat(int formatMode, const std::string &
else if (outformatSplit[i].compare("qorfend") == 0){ code = Parameters::OUTFMT_QORFEND;}
else if (outformatSplit[i].compare("torfstart") == 0){ code = Parameters::OUTFMT_TORFSTART;}
else if (outformatSplit[i].compare("torfend") == 0){ code = Parameters::OUTFMT_TORFEND;}
else if (outformatSplit[i].compare("ppos") == 0){ needSequences = true; needBacktrace = true; code = Parameters::OUTFMT_PPOS;}
else if (outformatSplit[i].compare("empty") == 0){ code = Parameters::OUTFMT_EMPTY;}
else {
Debug(Debug::ERROR) << "Format code " << outformatSplit[i] << " does not exist.";
Expand Down
1 change: 1 addition & 0 deletions src/commons/Parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ class Parameters {
static const int OUTFMT_TORFSTART = 37;
static const int OUTFMT_TORFEND = 38;
static const int OUTFMT_FIDENT = 39;
static const int OUTFMT_PPOS = 40;

static const int INDEX_SUBSET_NORMAL = 0;
static const int INDEX_SUBSET_NO_HEADERS = 1;
Expand Down
30 changes: 30 additions & 0 deletions src/util/convertalignments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,36 @@ int convertalignments(int argc, const char **argv, const Command &command) {
case Parameters::OUTFMT_TORFEND:
result.append(SSTR(res.dbOrfEndPos));
break;
case Parameters::OUTFMT_PPOS: {
float pPositive = 0;
int matchCount = 0;
if (res.backtrace.empty() == false) {
int qPos = 0;
int tPos = 0;
for (size_t pos = 0; pos < res.backtrace.size(); pos++) {
switch (res.backtrace[pos]) {
case 'M': {
char qRes = queryProfile ? queryProfData[qPos] : querySeqData[qPos];
char tRes = targetProfile ? targetProfData[tPos] : targetSeqData[tPos];
pPositive += (subMat->subMatrix[subMat->aa2num[(int)qRes]][subMat->aa2num[(int)tRes]] > 0);
matchCount += 1;
qPos++;
tPos++;
break;
}
case 'D':
qPos++;
break;
case 'I':
tPos++;
break;
}
}
pPositive /= matchCount;
}
result.append(SSTR(pPositive));
break;
}
}
if (i < outcodes.size() - 1) {
result.push_back('\t');
Expand Down

0 comments on commit 5edc79b

Please sign in to comment.