diff --git a/src/topp/IDFilter.cpp b/src/topp/IDFilter.cpp index 8952bc8cdc4..49befb7623a 100644 --- a/src/topp/IDFilter.cpp +++ b/src/topp/IDFilter.cpp @@ -221,6 +221,8 @@ class TOPPIDFilter : setMinInt_("best:n_spectra", 0); registerIntOption_("best:n_peptide_hits", "", 0, "Keep only the 'n' highest scoring peptide hits per spectrum (for n > 0).", false); setMinInt_("best:n_peptide_hits", 0); + registerStringOption_("best:spectrum_per_peptide", "", "false", "Keep one spectrum per peptide. Value determines if same sequence but different charges or modifications are treated as separate peptides or the same peptide. (default: false = filter disabled).", false); + setValidStrings_("best:spectrum_per_peptide", {"false", "sequence", "sequence+charge", "sequence+modification", "sequence+charge+modification"}); registerIntOption_("best:n_protein_hits", "", 0, "Keep only the 'n' highest scoring protein hits (for n > 0).", false); setMinInt_("best:n_protein_hits", 0); registerFlag_("best:strict", "Keep only the highest scoring peptide hit.\n" @@ -607,6 +609,25 @@ class TOPPIDFilter : IDFilter::keepNBestHits(peptides, best_n_pep); } + String spectrum_per_peptide = getStringOption_("best:spectrum_per_peptide"); + if (spectrum_per_peptide != "false") + { + OPENMS_LOG_INFO << "Keeping best spectrum per " << spectrum_per_peptide << endl; + if (spectrum_per_peptide == "sequence") // group by sequence and return best spectrum (->smallest number of spectra) + { + IDFilter::keepBestPerPeptide(peptides, true, true, 1); + } else if (spectrum_per_peptide == "sequence+modification") + { + IDFilter::keepBestPerPeptide(peptides, false, true, 1); + } else if (spectrum_per_peptide == "sequence+charge") + { + IDFilter::keepBestPerPeptide(peptides, true, false, 1); + } else if (spectrum_per_peptide == "sequence+charge+modification") // group by sequence, modificationm, charge combination and return best spectrum (->largest number of spectra) + { + IDFilter::keepBestPerPeptide(peptides, false, false, 1); + } + } + Int min_rank = 0, max_rank = 0; if (parseRange_(getStringOption_("best:n_to_m_peptide_hits"), min_rank, max_rank))