diff --git a/.github/workflows/knime_tests.yml b/.github/workflows/knime_tests.yml index 41b2fc199a5..7136a88cce9 100644 --- a/.github/workflows/knime_tests.yml +++ b/.github/workflows/knime_tests.yml @@ -1,13 +1,17 @@ name: 'Test KNIME workflows' on: workflow_dispatch: - + inputs: + updateURL: + type: string + description: use a specific location for the knime update site + default: https://abibuilder.cs.uni-tuebingen.de/archive/openms/knime-plugin/updateSite/nightly/ jobs: test-knime: env: GH_TOKEN: ${{ github.token }} - KNIME_MAJOR_VERSION: 4 - KNIME_MINOR_VERSION: 7 + KNIME_MAJOR_VERSION: 5 + KNIME_MINOR_VERSION: 3 INSTALLATION_DIR: ${{ github.workspace }} runs-on: ubuntu-latest steps: @@ -37,7 +41,7 @@ jobs: - name: Install OpenMS plugin run: | "$KNIME_DIR/knime" -application org.eclipse.equinox.p2.director \ - -r "http://update.knime.com/analytics-platform/${KNIME_VERSION},https://abibuilder.cs.uni-tuebingen.de/archive/openms/knime-plugin/updateSite/nightly/" \ + -r "http://update.knime.com/analytics-platform/${KNIME_VERSION},${{ inputs.updateURL }}" \ -p2.arch x86_64 \ -profileProperties org.eclipse.update.install.features=true \ -i "de.openms.feature.feature.group,com.genericworkflownodes.knime.feature.feature.group,de.openms.thirdparty.feature.feature.group" \ diff --git a/.github/workflows/openms_ci_matrix_full.yml b/.github/workflows/openms_ci_matrix_full.yml index f87203d46c2..39a146c4ba6 100644 --- a/.github/workflows/openms_ci_matrix_full.yml +++ b/.github/workflows/openms_ci_matrix_full.yml @@ -174,8 +174,8 @@ jobs: fi echo "version_number=$VERSION_NUMBER" >> $GITHUB_OUTPUT grep -ne "----[[:space:]]*OpenMS" ${{ github.workspace }}/OpenMS/CHANGELOG > index_changelog.txt - START=$(cat index_changelog.txt | grep -A 1 -e " $VERSION_NUMBER " | cut -f1 -d: | head -1) - END=$(cat index_changelog.txt | grep -A 1 -e " $VERSION_NUMBER " | cut -f1 -d: | tail -1) + START=$(cat index_changelog.txt | grep -A 1 -E " $VERSION_NUMBER(\.0)? " | cut -f1 -d: | head -1) + END=$(cat index_changelog.txt | grep -A 1 -E " $VERSION_NUMBER(\.0)? " | cut -f1 -d: | tail -1) echo "Extracting between lines:" echo $START echo $END @@ -266,6 +266,8 @@ jobs: fi if [[ "${{ matrix.os }}" == macos-* ]]; then + ## Update the package lists for Brew + brew update ## Needed for Qt. Install before to overwrite the default softlinks on the GH runners brew install python3 --force --overwrite brew install --quiet ccache autoconf automake libtool ninja && brew link --overwrite ccache @@ -537,7 +539,7 @@ jobs: - name: Download source archive as artifact uses: actions/download-artifact@v4 with: - name: OpenMS-${{ steps.create_changelog.outputs.version_number }}.tar.gz + name: OpenMS-${{ needs.build-and-test.outputs.version_number }}.tar.gz - name: Download changelog as artifact if: inputs.do_release @@ -585,7 +587,7 @@ jobs: mkdir -p ~/.ssh/ echo "$PASS" > ~/.ssh/private.key sudo chmod 600 ~/.ssh/private.key - ln -s ./$folder latest #create link to the release folder + ln -s ../$folder latest #create link to the release folder rsync --progress -avz -e "ssh -i ~/.ssh/private.key -o StrictHostKeyChecking=no" latest "$USER@$HOST:/OpenMSInstaller/release" - name: create RELEASE_TEXT @@ -665,7 +667,7 @@ jobs: if [[ "${{ github.ref_name }}" == "nightly" ]]; then folder=nightly elif [[ "${{ github.ref_name }}" == release/* ]]; then - folder=release/${{ github.ref_name }} + folder=${{ github.ref_name }} else folder=experimental/${{ github.ref_name }} fi @@ -696,7 +698,7 @@ jobs: echo "$PASS" > ~/.ssh/private.key sudo chmod 600 ~/.ssh/private.key ln -s ./$folder latest #we can use the same link from above. - rsync --progress -avz -e "ssh -i ~/.ssh/private.key -o StrictHostKeyChecking=no" latest "$USER@$HOST:/Documentation/release + rsync --progress -avz -e "ssh -i ~/.ssh/private.key -o StrictHostKeyChecking=no" latest "$USER@$HOST:/Documentation/release" # TODO create softlinks to latest nightly # TODO create and upload file hashes, at least for release candidate @@ -871,7 +873,7 @@ jobs: if [[ "${{ github.ref_name }}" == "nightly" ]]; then folder=nightly elif [[ "${{ github.ref_name }}" == release/* ]]; then - folder=release/${{ github.ref_name }} + folder=${{ github.ref_name }} else folder=experimental/${{ github.ref_name }} fi @@ -907,8 +909,8 @@ jobs: mkdir -p ~/.ssh/ echo "$PASS" > ~/.ssh/private.key sudo chmod 600 ~/.ssh/private.key - ln -s ./$folder latest #create link to the release folder - rsync --progress -avz -e "ssh -i ~/.ssh/private.key -o StrictHostKeyChecking=no" latest "$USER@$HOST:/knime-plugin/updateSite/release + ln -s ../$folder latest #create link to the release folder + rsync --progress -avz -e "ssh -i ~/.ssh/private.key -o StrictHostKeyChecking=no" latest "$USER@$HOST:/knime-plugin/updateSite/release" do-release: if: inputs.do_release diff --git a/.github/workflows/pyopenms-wheels.yml b/.github/workflows/pyopenms-wheels.yml index 244b4ab1de1..b08a8ada36b 100644 --- a/.github/workflows/pyopenms-wheels.yml +++ b/.github/workflows/pyopenms-wheels.yml @@ -181,6 +181,8 @@ jobs: - name: Install contrib packages from brew run: | + ## Update the package lists for Brew + brew update ## Needed for Qt. Install before to overwrite the default softlinks on the GH runners brew install python3 --force --overwrite brew install --quiet ccache autoconf automake libtool ninja && brew link --overwrite ccache @@ -295,6 +297,8 @@ jobs: - name: Install contrib packages from brew run: | + ## Update the package lists for Brew + brew update ## Needed for Qt. Install before to overwrite the default softlinks on the GH runners brew install python3 --force --overwrite brew install --quiet ccache autoconf automake libtool ninja && brew link --overwrite ccache diff --git a/.github/workflows/update_version_numbers.yml b/.github/workflows/update_version_numbers.yml index 75361952e52..5d4970d087c 100644 --- a/.github/workflows/update_version_numbers.yml +++ b/.github/workflows/update_version_numbers.yml @@ -16,7 +16,7 @@ jobs: runs-on: macos-latest steps: # Getting files (OpenMS) - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 # Update files with new package version numbers - name: update files @@ -40,6 +40,9 @@ jobs: # update test write ini out: sed -i '' "s# diff --git a/AUTHORS b/AUTHORS index 6db5bc54822..16fe55b1f56 100644 --- a/AUTHORS +++ b/AUTHORS @@ -53,6 +53,7 @@ the authors tag in the respective file header. - Johan Teleman - Johannes Junker - Johannes Veit + - Johannes von Kleist - Joshua Charkow - Julia Thueringer - Juliane Schmachtenberg diff --git a/CHANGELOG b/CHANGELOG index 39f27f33112..59cb8add06e 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -13,13 +13,26 @@ PR - Pull Request (on GitHub), i.e. integration of a new feature or bugfix #, e.g. #4957 - a reference to an issue or pull request on GitHub, visit e.g. https://github.com/OpenMS/OpenMS/pull/XXXX (replace XXXX with number of interest) for details ------------------------------------------------------------------------------------------ ----- OpenMS 3.2.0 (under development) ---- +---- OpenMS 3.3.0 (under development) ---- ------------------------------------------------------------------------------------------ +- FileInfo: + - support MzTab files (#7568) + + +------------------------------------------------------------------------------------------ +---- OpenMS 3.2.0 ---- +------------------------------------------------------------------------------------------ + + + What's new: - Changes breaking backwards compatibility: - Rename of parameters for TOPP tool FeatureFinderCentroided (debug -> advanced), and PeakPickerWavelet/TOFCalibration (optimization -> optimization:type) (#7154) - Rename of parameters for TOPP tool IDFilter (score:pep -> score:psm; score:prot -> score:protein; score:protgroup -> score:proteingroup) with 'nan' as new default (#7541) + - 3.2.0 KNIME package requires KNIME 5.3 or later +- Support for SubsetNeighborSearch (SNS) via DecoyDatabase (#7565) +- SageAdapter received large updates including added functionality for PTM discovery + enabling features such as chimera seach, RT prediction, filtering by q-value, etc. Library: - Extend FileHandler to support load and store operations for our major datastructures (spectra, features, identifications, etc.). Replaced file type specific code with the more generic FileHandler calls to decouple the IO code from other parts of the library. @@ -36,6 +49,7 @@ New Tools: - AssayGeneratorMetaboSirius -- Assay library generation from a SIRIUS project directory (Metabolomics) - SiriusExport -- Metabolite identification using single and tandem mass spectrometry + Fixes: - FileConverter: more robust (#7176) - MSFragger: allow relative path to database (#7155) @@ -50,11 +64,11 @@ Fixes: - TOPPAS: open files in TOPPView (#7213) - pyOpenMS: Log warnings in pure Python code with warnings.warn instead of print (#7418) - more robust parsing of mzIdentML (#7153) +- SageAdapter now works with sage v0.15.0 and beyond +- OpenSwath: Fix bug in diaPASEF window determination (#7546) Misc: -- FileInfo: - - Report IM ranges (if any) (#7459) - - support Mztab files (#7568) +- FileInfo: Report ion mobility ranges (if any) (#7459) - OpenMSInfo reports the ILP solver (CoinOr or glpk) (#7156) - add citation information for OpenMS 3.0 (Nat. Methods) (#7383) - Add export for Common Workflow Language (CWL) (#6156) diff --git a/CMakeLists.txt b/CMakeLists.txt index f5d8f303198..6b2aa37cf15 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -105,7 +105,7 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/Windows") #------------------------------------------------------------------------------ set(OPENMS_PACKAGE_VERSION_MAJOR "3") -set(OPENMS_PACKAGE_VERSION_MINOR "2") +set(OPENMS_PACKAGE_VERSION_MINOR "3") set(OPENMS_PACKAGE_VERSION_PATCH "0") set(OPENMS_PACKAGE_VERSION "${OPENMS_PACKAGE_VERSION_MAJOR}.${OPENMS_PACKAGE_VERSION_MINOR}.${OPENMS_PACKAGE_VERSION_PATCH}") diff --git a/THIRDPARTY b/THIRDPARTY index d6594eb775e..6c4aff683f5 160000 --- a/THIRDPARTY +++ b/THIRDPARTY @@ -1 +1 @@ -Subproject commit d6594eb775ebca0255f0129d946d7e582b37ac37 +Subproject commit 6c4aff683f5d6cf209a240a4e29fae7be16ce1ad diff --git a/doc/doxygen/public/TOPPAS.doxygen b/doc/doxygen/public/TOPPAS.doxygen index 673596f41aa..bd3706f08b7 100644 --- a/doc/doxygen/public/TOPPAS.doxygen +++ b/doc/doxygen/public/TOPPAS.doxygen @@ -46,12 +46,18 @@ @image html TOPPAS_simple_example.png @image latex TOPPAS_simple_example.png "" width=14cm - - To create a new TOPPAS file, you can either: + You can play with existing @em example @em pipelines, which cover identification, + quantification and some special use cases, such as SubsetNeighborSearch. + To open an existing example pipeline, select @p File > @p Open @p example @p file from the menu bar in TOPPAS. + See @ref TOPPAS_examples below for details. + + To create a new (empty) TOPPAS file, you can either: - open TOPPAS without providing any existing workflow - an empty workflow will be opened automatically - in a running TOPPAS program choose: @p File @p > @p New - - create an empty file in your file browser (explorer) with the suffix @p \.toppas and double-click it (on Windows systems all @p \.toppas files are associated with TOPPAS automatically during installation of %OpenMS, on Linux and MacOS you might need to manually associate the extension) + - create an empty file with the suffix @p \.toppas + + All @p \.toppas files are usually associated with TOPPAS automatically during installation of %OpenMS - at least on Windows. On Linux and MacOS you might need to manually associate the \.toppas extension. @page TOPPAS_interface User interface @@ -246,17 +252,21 @@ @page TOPPAS_examples Examples - The following sections explain the example pipelines TOPPAS comes with. You can - open them by selecting @p File > @p Open @p example @p file. All input files and - parameters are already specified, so you can just hit @p Pipeline > @p Run (or press + The following sections explain the example pipelines TOPPAS comes with. + + You can @em open all examples pipelines by selecting @p File > @p Open @p example @p file in TOPPAS. + + All input files and parameters are already specified, so you can just hit @p Pipeline > @p Run (or press @p F5) and see what happens. @section TOPPAS_peak_picking_example Profile data processing - The file @p peakpicker_tutorial.toppas contains a simple pipeline representing a + The file @p peakpicker_tutorial.toppas can be inspect it in TOPPAS via `File -> Open Example File`. + It contains a simple pipeline representing a common use case: starting with profile data, the noise is eliminated and the baseline is subtracted. Then, PeakPickerHiRes is used to find all peaks in the noise-filtered and baseline-reduced profile data. + @image html TOPPAS_example_profile_data_processing.png @image latex TOPPAS_example_profile_data_processing.png "" width=14cm @@ -264,8 +274,8 @@ @section TOPPAS_id_example Identification of E. coli peptides This section describes an example identification pipeline contained in the - example directory, @p Ecoli_Identification.toppas. It is shipped together - with a reduced example mzML file containing 139 MS2 spectra from an E. coli + example directory, @p Ecoli_Identification.toppas. Inspect it in TOPPAS via `File -> Open Example File`. + It is shipped together with a reduced example mzML file containing 139 MS2 spectra from an E. coli run on an Orbitrap instrument as well as an E. coli target-decoy database. We use the search engine @@ -293,7 +303,8 @@ @section TOPPAS_quant_example Quantitation of BSA runs The simple pipeline described in this section (@p BSA_Quantitation.toppas) can be used to quantify peptides - that occur on different runs. The example dataset contains three different bovine serum albumin (BSA) runs. + that occur on different runs. Inspect it in TOPPAS via `File -> Open Example File`. + The example dataset contains three different bovine serum albumin (BSA) runs. First, FeatureFinderCentroided is called since the dataset is centroided. The results of the feature finding are then annotated with (existing) identification results. For convenience, we provide these search results (as idXML files) with an FDR of 5% in the BSA directory. @@ -318,22 +329,35 @@ @image html TOPPAS_BSA_results_3d.png @image latex TOPPAS_BSA_results_3d.png "" width=10cm + + @section TOPPAS_subsetneighborsearch_example Subset Neighbor Search + + We will use a special FDR search strategy described by Lin et al.. + It uses a mode of @ref TOPP_DecoyDatabase which allows to create a special "neighbor" database to control the FDR. + + The example pipeline is named @p FDR_NeighborSearch.toppas. Inspect it in TOPPAS via `File -> Open Example File`. A description is provided within the TOPPAS workflow. + + @section TOPPAS_merger_example Merger and Collect nodes The following example is actually not a useful workflow but is supposed to demonstrate how merger and collector nodes can be used in a pipeline. Have a look at - @p merger_tutorial.toppas: + @p merger_tutorial.toppas. Inspect it in TOPPAS via `File -> Open Example File`. @image html TOPPAS_example_merger.png @image latex TOPPAS_example_merger.png "" width=14cm - As its name suggests, a merger merges its incoming file lists, i.e., - files of all incoming edges are appended into new lists (which - have as many elements as the merger has incoming connections). All tools this merger has outgoing - connections to are called with these merged lists as input files. All incoming connections should - pass the same number of files (unless the corresponding preceding tool is in recycling mode). + In short: mergers require multiple input edges, whose data is combined bit by bit. + Collectors on the other hand usually only have one input edge and combine all files from this single edge into a list in one go. The succeeding tool will be invoked only once, with a list of input files. + + In detail, a @em merger merges its incoming file lists, i.e., + files of all incoming edges are combined into new lists. Each file list has an many elements as the merger has incoming connections. + And there are as many lists as there are files(rounds) from the the preceeding(=upstream) tool. The tool downstream of the merger is invoked with + these merged lists as input files. + All incoming connections should pass the same number of files (unless one of the upstream nodes is in "recycling mode"), such that all merged lists have the same number of files. + In other words, if you have K input edges with N files each, the merger will create N output lists, with K elements each. - A collector node, on the other hand, waits for all rounds to finish before concatenating all files from all + A collector node, on the other hand, waits for all rounds to finish on its upstream side before concatenating all files from all incoming connections into one single list. It then calls the next tool with this list of files as input. This will happen exactly once during the entire pipeline run. diff --git a/share/OpenMS/CHEMISTRY/Enzymes.xml b/share/OpenMS/CHEMISTRY/Enzymes.xml index 99a37f77390..cbb50a8e6c7 100755 --- a/share/OpenMS/CHEMISTRY/Enzymes.xml +++ b/share/OpenMS/CHEMISTRY/Enzymes.xml @@ -150,6 +150,7 @@ + @@ -210,6 +211,7 @@ + diff --git a/share/OpenMS/examples/TOPPAS/FDR_NeighborSearch.toppas b/share/OpenMS/examples/TOPPAS/FDR_NeighborSearch.toppas new file mode 100644 index 00000000000..912ba56529c --- /dev/null +++ b/share/OpenMS/examples/TOPPAS/FDR_NeighborSearch.toppas @@ -0,0 +1,425 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h b/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h new file mode 100644 index 00000000000..16747c697b2 --- /dev/null +++ b/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h @@ -0,0 +1,161 @@ +// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Chris Bielow, Philipp Wang $ +// $Authors: Chris Bielow, Philipp Wang $ +// -------------------------------------------------------------------------- + +#pragma once + +#include +#include + +#include +#include + + + +namespace OpenMS +{ + /** + @brief The Neighbor Peptide functionality is designed to find peptides (neighbors) in a given set of sequences (FASTA file) that are + similar to a target peptide (aka relevant peptide) based on mass and spectral characteristics. This provides more power + when searching complex samples, when only a subset of the peptides/proteins is of interest. + + The paper on subset neighbor search is www.ncbi.nlm.nih.gov/pmc/articles/PMC8489664/ + DOI: 10.1021/acs.jproteome.1c00483 + */ + class OPENMS_DLLAPI NeighborSeq + { + + public: + /// Constructor + /// @param digested_relevant_peptides A vector of digested relevant peptides + NeighborSeq(std::vector&& digested_relevant_peptides); + + /** + * @brief Generates a theoretical spectrum for a given peptide sequence with b/y ions at charge 1. + * + * Includes all b and y ions with charge 1 (even the prefix ions, e.g. b1), but no internal ions. + * + * @param peptide_sequence The peptide sequence for which to generate the spectrum. + * @return The generated theoretical spectrum. + */ + MSSpectrum generateSpectrum(const AASequence& peptide_sequence); + + /** + * @brief Compares two spectra to determine if they share a sufficient number of ions. + * + * All peaks are considered. Use generateSpectrum() to generate theoretical spectra with b/y ions. + * + * @param spec1 The first theoretical spectrum. + * @param spec2 The second theoretical spectrum. + * @param min_shared_ion_fraction The minimal required proportion of shared ions in [0, 1] + * @param mz_bin_size Bin size for the m/z values, which determines if two peaks are considered to be the same (typically, 0.05 for high resolution and 1.0005079 for low resolution). + * @return True if the spectra share a sufficient number of ions, false otherwise. + */ + static bool isNeighborSpectrum(const MSSpectrum& spec1, const MSSpectrum& spec2, const double min_shared_ion_fraction, const double mz_bin_size); + /** + * @brief Compute the number of shared ions between two spectra + * + * All peaks are considered. Use generateSpectrum() to generate theoretical spectra with b/y ions. + * + * @param spec1 The first theoretical spectrum. + * @param spec2 The second theoretical spectrum. + * @param mz_bin_size Bin size for the m/z values, which determines if two peaks are considered to be the same. + * @return The number of shared ions + */ + static int computeSharedIonCount(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& mz_bin_size); + + /** + * @brief Is this peptide a neighbor to one of the relevant peptides? + * + * Also updates the internal statistics, which can be retrieved using getNeighborStats(). + * + * @param neighbor_candidate The peptide sequence (from a neighbor protein) to compare against the internal relevant peptides (see constructor). + * @param mass_tolerance_pc Maximal precursor mass difference (in Da or ppm; see 'mass_tolerance_pc_ppm') between neighbor and relevant peptide. + * @param mass_tolerance_pc_ppm Is 'mass_tolerance_pc' in Da or ppm? + * @param min_shared_ion_fraction The ion tolerance for neighbor peptides. + * @param mz_bin_size Bin size for spectra m/z comparison (the original study suggests 0.05 Th for high-res and 1.0005079 Th for low-res spectra). + * @return true if @p neighbor_candidate is neighbor to one or more relevant peptides, false otherwise. + */ + bool isNeighborPeptide(const AASequence& neighbor_candidate, + const double mass_tolerance_pc, + const bool mass_tolerance_pc_ppm, + const double min_shared_ion_fraction, + const double mz_bin_size); + + /// Statistics of how many neighbors were found per reference peptide + struct NeighborStats + { + /** @name NeigborStats_members + * Mutually exclusive categories of how many neighbors were found per reference peptide + */ + ///@{ + int unfindable_peptides = 0; ///< how many ref-peptides contain an 'X' (unknown amino acid) and thus cannot be searched for neighbors + int findable_no_neighbors = 0; ///< how many peptides had no neighbors? + int findable_one_neighbor = 0; ///< how many peptides had exactly one neighbor? + int findable_multiple_neighbors = 0; ///< how many peptides had multiple neighbors? + ///@} + + /// Sum of all 4 categories + int total() const + { + return unfindable_peptides + findable_no_neighbors + findable_one_neighbor + findable_multiple_neighbors; + } + /// Number of reference peptides that contain an 'X' (unknown amino acid), formatted as 'X (Y%)' + String unfindable() const + { + return String(unfindable_peptides) + " (" + unfindable_peptides * 100 / total() + "%)"; + } + + /// Number of reference peptides that had no neighbors, formatted as 'X (Y%)' + String noNB() const + { + return String(findable_no_neighbors) + " (" + findable_no_neighbors * 100 / total() + "%)"; + } + /// Number of reference peptides that had exactly one neighbor, formatted as 'X (Y%)' + String oneNB() const + { + return String(findable_one_neighbor) + " (" + findable_one_neighbor * 100 / total() + "%)"; + } + /// Number of reference peptides that had multiple neighbors, formatted as 'X (Y%)' + String multiNB() const + { + return String(findable_multiple_neighbors) + " (" + findable_multiple_neighbors * 100 / total() + "%)"; + } + }; + + /// after calling isNeighborPeptide() multiple times, this function returns the statistics of how many neighbors were found per reference peptide + NeighborStats getNeighborStats() const; + + protected: + /** + * @brief Creates a map of masses to positions from the internal relevant peptides. + * @return A map where the key is the mass and the value is a vector of positions. + */ + std::map> createMassLookup_(); + + /** + * @brief Finds candidate positions based on a given mono-isotopic weight and mass tolerance. + * @param mono_weight The mono-isotopic weight to find candidates for. + * @param mass_tolerance The allowed tolerance for matching the mass. + * @param mass_tolerance_pc_ppm Whether the mass tolerance is in ppm. + * @return A pair of begin/end iterators into mass_position_map_ for the candidate positions + */ + auto findCandidatePositions_(const double mono_weight, double mass_tolerance, const bool mass_tolerance_pc_ppm); + + + private: + const std::vector& digested_relevant_peptides_; ///< digested relevant peptides + std::map> mass_position_map_; ///< map of masses to positions in digested_relevant_peptides_ + + TheoreticalSpectrumGenerator spec_gen_; ///< for b/y ions with charge 1 + const Residue* x_residue_; ///< residue for unknown amino acid + + std::vector neighbor_stats_; ///< how many neighbors per reference peptide searched using isNeighborPeptide()? + + }; // class NeighborSeq + +} // namespace OpenMS diff --git a/src/openms/include/OpenMS/ANALYSIS/ID/sources.cmake b/src/openms/include/OpenMS/ANALYSIS/ID/sources.cmake index 9484ece68a7..823a3cfb83d 100644 --- a/src/openms/include/OpenMS/ANALYSIS/ID/sources.cmake +++ b/src/openms/include/OpenMS/ANALYSIS/ID/sources.cmake @@ -34,6 +34,7 @@ IonIdentityMolecularNetworking.h MessagePasserFactory.h MetaboliteSpectralMatching.h MorpheusScore.h +NeighborSeq.h PeptideIndexing.h PeptideProteinResolution.h PercolatorFeatureSetHelper.h diff --git a/src/openms/include/OpenMS/APPLICATIONS/TOPPBase.h b/src/openms/include/OpenMS/APPLICATIONS/TOPPBase.h index 2b5b0f6e20b..8dea6710586 100644 --- a/src/openms/include/OpenMS/APPLICATIONS/TOPPBase.h +++ b/src/openms/include/OpenMS/APPLICATIONS/TOPPBase.h @@ -188,6 +188,9 @@ namespace OpenMS /// Returns a link to the documentation of the tool (accessible on our servers and only after inclusion in the nightly branch or a release). String getDocumentationURL() const; + /// The latest and greatest OpenMS citation + static const Citation cite_openms; + private: /// Tool name. This is assigned once and for all in the constructor. String const tool_name_; @@ -960,9 +963,6 @@ namespace OpenMS /// .TOPP.ini file for storing system default parameters static String topp_ini_file_; - /// The OpenMS citation - static const Citation cite_openms_; - /// Debug level set by -debug Int debug_level_; private: diff --git a/src/openms/include/OpenMS/FORMAT/FASTAFile.h b/src/openms/include/OpenMS/FORMAT/FASTAFile.h index 0629cc91860..b20b6927d14 100644 --- a/src/openms/include/OpenMS/FORMAT/FASTAFile.h +++ b/src/openms/include/OpenMS/FORMAT/FASTAFile.h @@ -102,6 +102,9 @@ namespace OpenMS */ void readStart(const String& filename); + /// same as readStart(), but does internal progress logging whenever readNextWithProgress() is called + void readStartWithProgress(const String& filename, const String& progress_label); + /** @brief Reads the next FASTA entry from file. If you want to read all entries in one go, use load(). @@ -111,7 +114,11 @@ namespace OpenMS */ bool readNext(FASTAEntry& protein); - /// current stream position + /// same as readNext(), but does internal progress logging; use readStartWithProgress() to enable this + /// Calls progressEnd() when EOF is reached (i.e. when returning false) + bool readNextWithProgress(FASTAEntry& protein); + + /// current stream position when reading a file std::streampos position(); /// is stream at EOF? diff --git a/src/openms/include/OpenMS/FORMAT/PercolatorInfile.h b/src/openms/include/OpenMS/FORMAT/PercolatorInfile.h index 2e5da34cba2..ccca14828eb 100644 --- a/src/openms/include/OpenMS/FORMAT/PercolatorInfile.h +++ b/src/openms/include/OpenMS/FORMAT/PercolatorInfile.h @@ -30,19 +30,42 @@ namespace OpenMS int min_charge, int max_charge); - /** @brief load pin file and convert to a vector of PeptideIdentification using the given score column @p score_name and orientation @p higher_score_better. - If a decoy prefix is provided, the decoy status is set from the protein accessions. - Otherwise, it assumes that the pin file already contains the correctly annotated decoy status. - If @p extra_scores is not empty, the scores are added to the PeptideHit as MetaValues. - If a filename column is encountered the set of @p filenames is filled in the order of appearance and PeptideIdentifications annotated with the id_merge_index meta value to link them to the filename (similar to a merged idXML file). - TODO: implement something similar to PepXMLFile().setPreferredFixedModifications(getModifications_(fixed_modifications_names)); - **/ + + /** + * @brief Loads peptide identifications from a Percolator input file. + * + * This function reads a Percolator input file (`pin_file`) and returns a vector of `PeptideIdentification` objects. + * It extracts relevantinformation such as peptide sequences, scores, charges, annotations, and protein accessions, applying + * specified thresholds and handling decoy targets as needed. + * Note: If a filename column is encountered the set of @p filenames is filled in the order of appearance and PeptideIdentifications annotated with the id_merge_index meta value to link them to the filename (similar to a merged idXML file). + * + * @param pin_file he path to the Percolator input file with a `.pin` extension. + * + * @param higher_score_better A boolean flag indicating whether higher scores are considered better (`true`) or lower scores are better (`false`). + * + * @param score_name The name of the primary score to be used for ranking peptide hits. + * + * @param extra_scores A list of additional score names that should be extracted and stored in each `PeptideHit`. + * + * @param filenames Will be populated with the unique raw file names extracted from the input data. + * + * @param decoy_prefix The prefix used to identify decoy protein accessions. Proteins with accessions starting with this prefix are marked as decoys. Otherwise, it assumes that the pin file already contains the correctly annotated decoy status. + * @param threshold A double value representing the threshold for the `spectrum_q` value. Only spectra with `spectrum_q` below this threshold are processed. + Implemented to allow prefiltering of Sage results. + * @param SageAnnotation A boolean value used to determine if the pin file is coming from Sage or not + * @return A `std::vector` of `PeptideIdentification` objects containing the peptide identifications. + + * @throws `Exception::ParseError` if any line in the input file does not have the expected number of columns. + * TODO: implement something similar to PepXMLFile().setPreferredFixedModifications(getModifications_(fixed_modifications_names)); + */ static std::vector load(const String& pin_file, bool higher_score_better, const String& score_name, const StringList& extra_scores, StringList& filenames, - String decoy_prefix = ""); + String decoy_prefix = "", + double threshold = 0.01, + bool SageAnnotation = false); // uses spectrum_reference, if empty uses spectrum_id, if also empty fall back to using index static String getScanIdentifier(const PeptideIdentification& pid, size_t index); diff --git a/src/openms/include/OpenMS/FORMAT/SwathFile.h b/src/openms/include/OpenMS/FORMAT/SwathFile.h index 9244f00501b..d3b0eef045a 100644 --- a/src/openms/include/OpenMS/FORMAT/SwathFile.h +++ b/src/openms/include/OpenMS/FORMAT/SwathFile.h @@ -92,7 +92,8 @@ namespace OpenMS /// Counts the number of scans in a full Swath file (e.g. concatenated non-split file) void countScansInSwath_(const std::vector& exp, std::vector& swath_counter, int& nr_ms1_spectra, - std::vector& known_window_boundaries); + std::vector& known_window_boundaries, + double TOLERANCE=1e-6); }; } diff --git a/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp b/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp new file mode 100644 index 00000000000..d1b480e94d5 --- /dev/null +++ b/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp @@ -0,0 +1,177 @@ +// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Chris Bielow, Philipp Wang $ +// $Authors: Chris Bielow, Philipp Wang $ +// -------------------------------------------------------------------------- +#include +#include +#include +#include + +#include + +using namespace OpenMS; +using namespace std; + + +NeighborSeq::NeighborSeq(std::vector&& digested_relevant_peptides) + : digested_relevant_peptides_(std::move(digested_relevant_peptides)), + neighbor_stats_(digested_relevant_peptides_.size(), 0) +{ + Param params; + params.setValue("add_b_ions", "true"); + params.setValue("add_y_ions", "true"); + params.setValue("add_first_prefix_ion", "true"); // do not skip b1 ion + spec_gen_.setParameters(params); + + x_residue_ = ResidueDB::getInstance()->getResidue('X'); + + // Index peptide masses for fast lookup + mass_position_map_ = createMassLookup_(); +} + +// Function to generate the theoretical spectrum for a given peptide sequence +MSSpectrum NeighborSeq::generateSpectrum(const AASequence& peptide_sequence) +{ + MSSpectrum spectrum; + spec_gen_.getSpectrum(spectrum, peptide_sequence, 1, 1); + return spectrum; +} + +int NeighborSeq::computeSharedIonCount(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& mz_bin_size) +{ + // compute shared b/y ions in two sorted ranges + auto setIntersectionCount = [mz_bin_size](auto first1, auto last1, auto first2, auto last2) -> Size + { + Size count {0}; + while (first1 != last1 && first2 != last2) + { + auto val1 = int(first1->getMZ() / mz_bin_size); + auto val2 = int(first2->getMZ() / mz_bin_size); + if (val1 < val2) ++first1; + else + { + if (val1 == val2) + { + ++first1; + ++count; + } + ++first2; + } + } + return count; + }; + + auto shared_ions = setIntersectionCount(spec1.begin(), spec1.end(), spec2.begin(), spec2.end()); + + return shared_ions; +} + +// Function to compare two spectra and determine if they are similar +bool NeighborSeq::isNeighborSpectrum(const MSSpectrum& spec1, const MSSpectrum& spec2, const double min_shared_ion_fraction, const double mz_bin_size) +{ + // Calculate the number of shared bins considering the bin frequencies + int B12 = computeSharedIonCount(spec1, spec2, mz_bin_size); + + // Calculate the fraction of shared bins + double fraction_shared = (2.0 * B12) / (spec1.size() + spec2.size()); + + return fraction_shared > min_shared_ion_fraction; +} + +//Finds candidate positions based on a given mono-isotopic weight and mass tolerance. +auto NeighborSeq::findCandidatePositions_(const double mono_weight, double mass_tolerance, const bool mass_tolerance_pc_ppm) +{ + // Calculate the lower and upper bounds for the mass tolerance range + assert(mass_tolerance >= 0); + if (mass_tolerance_pc_ppm) + { + mass_tolerance = Math::ppmToMass(mono_weight, mass_tolerance); + } + + // Find the lower bound iterator in the map + auto lower = mass_position_map_.lower_bound(mono_weight - mass_tolerance); + + // Find the upper bound iterator in the map + auto upper = mass_position_map_.upper_bound(mono_weight + mass_tolerance); + + return make_pair(lower, upper); +} + +// Method to find neighbor peptides in a given FASTA file +bool NeighborSeq::isNeighborPeptide(const AASequence& peptide, + const double mass_tolerance_pc, + const bool mass_tolerance_pc_ppm, + const double min_shared_ion_fraction, + const double mz_bin_size) + +{ + auto [from, to] = findCandidatePositions_(peptide.getMonoWeight(), mass_tolerance_pc, mass_tolerance_pc_ppm); + if (from == to) return false; + + bool found = false; + MSSpectrum spec = generateSpectrum(peptide); + for (auto it_rel_pep = from; it_rel_pep != to; ++it_rel_pep) + { + for (int pep_index : it_rel_pep->second) + { + MSSpectrum neighbor_spec = generateSpectrum(digested_relevant_peptides_[pep_index]); + if (isNeighborSpectrum(spec, neighbor_spec, min_shared_ion_fraction, mz_bin_size)) + { + //std::cout << digested_relevant_peptides_[pep_index] << " has neighbor " << peptide << '\n'; + neighbor_stats_[pep_index]++; + found = true; + } + } + } + return found; +} + +map> NeighborSeq::createMassLookup_() +{ + // Map to store the mass and corresponding positions + map> mass_position_map; + + int skipped{0}; + // Iterate through the vector of AASequence objects + for (size_t i = 0; i < digested_relevant_peptides_.size(); ++i) + { + if (digested_relevant_peptides_[i].has(*x_residue_)) + { + neighbor_stats_[i] = -1; // mark as not findable + skipped++; + continue; + } + // Calculate the mono-isotopic mass of the sequence + double mass = digested_relevant_peptides_[i].getMonoWeight(); + + // Insert the mass and the position into the map + mass_position_map[mass].push_back(i); + } + OPENMS_LOG_WARN << "Skipped " << skipped << "/" << digested_relevant_peptides_.size() + << " peptides with unknown('X') amino acids." << endl; + return mass_position_map; +} + +NeighborSeq::NeighborStats NeighborSeq::getNeighborStats() const +{ + NeighborStats stats; + for (int count : neighbor_stats_) + { + if (count == -1) + stats.unfindable_peptides++; + else if (count == 0) + stats.findable_no_neighbors++; + else if (count == 1) + stats.findable_one_neighbor++; + else + stats.findable_multiple_neighbors++; + } + return stats; +} + + + + diff --git a/src/openms/source/ANALYSIS/ID/sources.cmake b/src/openms/source/ANALYSIS/ID/sources.cmake index 41cc05a6d59..de1522abfb9 100644 --- a/src/openms/source/ANALYSIS/ID/sources.cmake +++ b/src/openms/source/ANALYSIS/ID/sources.cmake @@ -34,6 +34,7 @@ IonIdentityMolecularNetworking.cpp MessagePasserFactory.cpp MetaboliteSpectralMatching.cpp MorpheusScore.cpp +NeighborSeq.cpp PeptideProteinResolution.cpp PeptideIndexing.cpp PercolatorFeatureSetHelper.cpp diff --git a/src/openms/source/APPLICATIONS/TOPPBase.cpp b/src/openms/source/APPLICATIONS/TOPPBase.cpp index 13728ac837b..a739f9b794c 100755 --- a/src/openms/source/APPLICATIONS/TOPPBase.cpp +++ b/src/openms/source/APPLICATIONS/TOPPBase.cpp @@ -69,7 +69,7 @@ namespace OpenMS using namespace Exception; String TOPPBase::topp_ini_file_ = String(QDir::homePath()) + "/.TOPP.ini"; - const Citation TOPPBase::cite_openms_ + const Citation TOPPBase::cite_openms = {"Pfeuffer, J., Bielow, C., Wein, S. et al.", "OpenMS 3 enables reproducible analysis of large-scale mass spectrometry data", "Nat Methods (2024)", "10.1038/s41592-024-02197-7"}; @@ -571,7 +571,7 @@ namespace OpenMS << bright("Full documentation: ") << underline(docurl) // the space is needed, otherwise the remaining line will be underlined on Windows.. << "\n" << bright("Version: ") << verboseVersion_ << "\n" - << bright("To cite OpenMS:\n") << " + " << is.indent(3) << cite_openms_.toString() + << bright("To cite OpenMS:\n") << " + " << is.indent(3) << cite_openms.toString() << is.indent(0) << "\n"; if (!citations_.empty()) { @@ -2402,7 +2402,7 @@ namespace OpenMS // collect citation information std::vector citation_dois; citation_dois.reserve(citations_.size() + 1); - citation_dois.push_back(cite_openms_.doi); + citation_dois.push_back(cite_openms.doi); for (auto& citation : citations_) { citation_dois.push_back(citation.doi); diff --git a/src/openms/source/FORMAT/FASTAFile.cpp b/src/openms/source/FORMAT/FASTAFile.cpp index b771bdccf7f..52d8d1cb18a 100644 --- a/src/openms/source/FORMAT/FASTAFile.cpp +++ b/src/openms/source/FORMAT/FASTAFile.cpp @@ -146,6 +146,12 @@ namespace OpenMS entries_read_ = 0; } + void FASTAFile::readStartWithProgress(const String& filename, const String& progress_label) + { + readStart(filename); + startProgress(0, fileSize_, progress_label); + } + bool FASTAFile::readNext(FASTAEntry &protein) { if (infile_.eof()) @@ -176,9 +182,25 @@ namespace OpenMS protein.description = std::move(description_); protein.sequence = std::move(seq_); + setProgress(infile_.tellg()); + return true; } + bool FASTAFile::readNextWithProgress(FASTAEntry& protein) + { + if (readNext(protein)) + { + setProgress(position()); + return true; + } + else + { + endProgress(); + return false; + } + } + std::streampos FASTAFile::position() { return infile_.tellg(); @@ -233,7 +255,7 @@ namespace OpenMS void FASTAFile::writeNext(const FASTAEntry &protein) { - outfile_ << ">" << protein.identifier << " " << protein.description << "\n"; + outfile_ << '>' << protein.identifier << ' ' << protein.description << "\n"; const String &tmp(protein.sequence); int chunks(tmp.size() / 80); // number of complete chunks diff --git a/src/openms/source/FORMAT/PercolatorInfile.cpp b/src/openms/source/FORMAT/PercolatorInfile.cpp index bf80f8b4fa1..76ba01299ed 100644 --- a/src/openms/source/FORMAT/PercolatorInfile.cpp +++ b/src/openms/source/FORMAT/PercolatorInfile.cpp @@ -3,7 +3,7 @@ // // -------------------------------------------------------------------------- // $Maintainer: Timo Sachsenberg $ -// $Authors: Timo Sachsenberg $ +// $Authors: Timo Sachsenberg, Johannes von Kleist $ // -------------------------------------------------------------------------- #include @@ -62,29 +62,105 @@ namespace OpenMS const String& score_name, const StringList& extra_scores, StringList& filenames, - String decoy_prefix) + String decoy_prefix, + double threshold, + bool SageAnnotation) { CsvFile csv(pin_file, '\t'); - StringList header; - csv.getRow(0, header); + + //Sage Variables, initialized in the following block if SageAnnotation is set + map> anno_mapping; + CsvFile tsv; + CsvFile annos; + unordered_map to_idx_t; + + if (SageAnnotation) // Block for special treatment of sage + { + String tsv_file_path = pin_file.substr(0, pin_file.size()-3); + tsv_file_path = tsv_file_path + "tsv"; + tsv = CsvFile(tsv_file_path,'\t'); + + String temp_diff = "results.sage.pin"; + String anno_file_path = pin_file.substr(0, pin_file.size()-temp_diff.length()); + anno_file_path = anno_file_path + "matched_fragments.sage.tsv"; + annos = CsvFile(anno_file_path, '\t'); + //map PSMID to vec of PeakAnnotation + StringList sage_tsv_header; + tsv.getRow(0, sage_tsv_header); + to_idx_t; // map column name to column index, for full .tsv file + { + int idx_t{}; + for (const auto& h : sage_tsv_header) { to_idx_t[h] = idx_t++; } + } + + // processs annotation file + StringList sage_annotation_header; + annos.getRow(0, sage_annotation_header); + unordered_map to_idx_a; // map column name to column index, for full annotation file file + { + int idx_a{}; + for (const auto& h : sage_annotation_header) { to_idx_a[h] = idx_a++; } + } + // map PSMs -> PeakAnnotation vector + auto num_rows = annos.rowCount(); + + for (size_t i = 1; i < num_rows; ++i) + { + StringList row; + annos.getRow(i, row); + + //Check if mapping already has PSM, if it does add + if (anno_mapping.find(row[to_idx_a.at("psm_id")].toInt()) == anno_mapping.end()) + { + //Make a new vector of annotations + PeptideHit::PeakAnnotation peak_temp; + + peak_temp.annotation = row[to_idx_a.at("fragment_type")] + row[to_idx_a.at("fragment_ordinals")]; + peak_temp.charge = row[to_idx_a.at("fragment_charge")].toInt(); + peak_temp.intensity = row[to_idx_a.at("fragment_intensity")].toDouble(); + peak_temp.mz = row[to_idx_a.at("fragment_mz_experimental")].toDouble(); + + vector temp_anno_vec; + temp_anno_vec.push_back(peak_temp); + anno_mapping[ row[to_idx_a.at("psm_id")].toInt() ] = temp_anno_vec; + } + else + { + //Add values to exisiting vector + PeptideHit::PeakAnnotation peak_temp; + + peak_temp.annotation = row[to_idx_a.at("fragment_type")] + row[to_idx_a.at("fragment_ordinals")]; + peak_temp.charge = row[to_idx_a.at("fragment_charge")].toInt(); + peak_temp.intensity = row[to_idx_a.at("fragment_intensity")].toDouble(); + peak_temp.mz = row[to_idx_a.at("fragment_mz_experimental")].toDouble(); + + anno_mapping[ row[to_idx_a.at("psm_id")].toInt() ].push_back(peak_temp); + } + } + } + + StringList pin_header; + + csv.getRow(0, pin_header); unordered_map to_idx; // map column name to column index { int idx{}; - for (const auto& h : header) { to_idx[h] = idx++; } + for (const auto& h : pin_header) { to_idx[h] = idx++; } } + // determine file name column index in percolator in file int file_name_column_index{-1}; - if (auto it = std::find(header.begin(), header.end(), "FileName"); it != header.end()) + if (auto it = std::find(pin_header.begin(), pin_header.end(), "FileName"); it != pin_header.end()) { - file_name_column_index = it - header.begin(); + file_name_column_index = it - pin_header.begin(); } - - // get column indices of extra scores - std::set found_extra_scores; // additional (non-main) scores that should be stored in the PeptideHit, order important for comparable idXML + + // determine extra scores and store column indices + std::set found_extra_scores; // additional (non-main) scores that should be stored in the PeptideHit, order important for comparable idXML for (const String& s : extra_scores) { - if (auto it = std::find(header.begin(), header.end(), s); it != header.end()) + if (auto it = std::find(pin_header.begin(), pin_header.end(), s); it != pin_header.end()) { found_extra_scores.insert(s); } @@ -93,7 +169,7 @@ namespace OpenMS OPENMS_LOG_WARN << "Extra score: " << s << " not found in Percolator input file." << endl; } } - + // charge columns are not standardized, so we check for the format and create hash to lookup column name to charge mapping std::regex charge_one_hot_pattern("^charge\\d+$"); std::regex sage_one_hot_pattern("^z=\\d+$"); @@ -104,7 +180,7 @@ namespace OpenMS // The reason is that sage searches always for the charge annotated in the spectrum raw file. Only if the annotation is missing it will search // the suggested charge range. bool found_sage_otherz_charge_column{false}; - for (const String& c : header) + for (const String& c : pin_header) { if (std::regex_match(c, charge_one_hot_pattern)) { @@ -136,11 +212,21 @@ namespace OpenMS StringList row; csv.getRow(i, row); - if (row.size() != header.size()) + StringList t_row; + + if (SageAnnotation) + { + tsv.getRow(i, t_row); + // skip if spectrum_q is above threshold + if (t_row[to_idx_t.at("spectrum_q")].toDouble() > threshold ) continue; + } + + if (row.size() != pin_header.size()) { - throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Error: line " + String(i) + " of file '" + pin_file + "' does not have the same number of columns as the header!", String(i)); + throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Error: line " + String(i) + " of file '" + pin_file + "' does not have the same number of columns as the pin_header!", String(i)); } + if (file_name_column_index >= 0) { raw_file_name = row[file_name_column_index]; @@ -150,7 +236,6 @@ namespace OpenMS map_filename_to_idx[raw_file_name] = filenames.size() - 1; } } - // NOTE: In our pin files that we WRITE, SpecID will be filename + vendor spectrum native ID // However, many search engines (e.g. Sage) choose arbitrary IDs, which is unfortunately allowed // by this loosely defined format. @@ -159,14 +244,12 @@ namespace OpenMS if (auto it = to_idx.find("ion_mobility"); it != to_idx.end()) { const String& sIM = row[it->second]; - const double IM = sIM.toDouble(); - pids.back().setMetaValue(Constants::UserParam::IM, IM); + const double IM = sIM.toDouble(); + if (!pids.empty()) pids.back().setMetaValue(Constants::UserParam::IM, IM); } - // In theory, this should be an integer, but Sage currently cannot extract the number from all vendor spectrum IDs, // so it writes the full ID as string String sScanNr = row[to_idx.at("ScanNr")]; - if (sSpecId != spec_id) { pids.resize(pids.size() + 1); @@ -174,13 +257,12 @@ namespace OpenMS pids.back().setScoreType(score_name); pids.back().setMetaValue(Constants::UserParam::ID_MERGE_INDEX, map_filename_to_idx.at(raw_file_name)); pids.back().setRT(row[to_idx.at("retentiontime")].toDouble() * 60.0); // search engines typically write minutes (e.g., sage) - pids.back().setMetaValue("PinSpecId", sSpecId); + pids.back().setMetaValue("PinSpecId", sSpecId); // Since ScanNr is the closest to help in identifying the spectrum in the file later on, // we use it as spectrum_reference. Since it can be integer only or the complete // vendor ID, you will need a lookup in case of number only later!! - pids.back().setSpectrumReference(sScanNr); + pids.back().setSpectrumReference(sScanNr); } - String sPeptide = row[to_idx.at("Peptide")]; const double score = row[to_idx.at(score_name)].toDouble(); String target_decoy = row[to_idx.at("Label")].toInt() == 1 ? "target" : "decoy"; @@ -253,12 +335,33 @@ namespace OpenMS AASequence aa_seq = AASequence::fromString(sPeptide); PeptideHit ph(score, rank, charge, std::move(aa_seq)); ph.setMetaValue("target_decoy", target_decoy); + for (const auto& name : found_extra_scores) { ph.setMetaValue(name, row[to_idx.at(name)]); } ph.setRank(rank); + // adding own meta values + if (SageAnnotation) + { + ph.setMetaValue("spectrum_q", t_row[to_idx_t.at("spectrum_q")].toDouble()); //TODO: check if column exists / SAGE specific treatment + } + ph.setMetaValue("DeltaMass", ( row[to_idx.at("ExpMass")].toDouble() - row[to_idx.at("CalcMass")].toDouble()) ); + // add annotations + if (SageAnnotation) + { + if (anno_mapping.find(sSpecId.toInt()) != anno_mapping.end()) + { + // copy annotations from mapping to PeptideHit + vector pep_vec; + for (const PeptideHit::PeakAnnotation& pep : anno_mapping[sSpecId.toInt()]) + { + pep_vec.push_back(pep) ; + } + ph.setPeakAnnotations(pep_vec); + } + } // add link to protein (we only know the accession but not start/end, aa_before/after in protein at this point) for (const String& accession : accessions) { @@ -267,6 +370,7 @@ namespace OpenMS pids.back().insertHit(std::move(ph)); } + return pids; } @@ -542,4 +646,4 @@ namespace OpenMS return count; } -} +} \ No newline at end of file diff --git a/src/openms/source/FORMAT/SwathFile.cpp b/src/openms/source/FORMAT/SwathFile.cpp index 0c5c41e35d5..6ce7016a6d1 100644 --- a/src/openms/source/FORMAT/SwathFile.cpp +++ b/src/openms/source/FORMAT/SwathFile.cpp @@ -309,7 +309,8 @@ namespace OpenMS /// Counts the number of scans in a full Swath file (e.g. concatenated non-split file) void SwathFile::countScansInSwath_(const std::vector& exp, std::vector& swath_counter, int& nr_ms1_spectra, - std::vector& known_window_boundaries) + std::vector& known_window_boundaries, + double TOLERANCE) { int ms1_counter = 0; for (Size i = 0; i < exp.size(); i++) @@ -328,28 +329,32 @@ namespace OpenMS "Found SWATH scan (MS level 2 scan) without a precursor. Cannot determine SWATH window."); } const std::vector prec = s.getPrecursors(); - double center = prec[0].getMZ(); - - - // check if ion mobility is present - double lowerIm = -1; - double upperIm = -1; // these initial values assume ion mobility is not present + // set ion mobility if exists, otherwise will take default value of -1 + double imLower, imUpper; if (s.metaValueExists("ion mobility lower limit")) { - lowerIm = s.getMetaValue("ion mobility lower limit"); // want this to be -1 if no ion mobility - upperIm = s.getMetaValue("ion mobility upper limit"); + imLower = s.getMetaValue("ion mobility lower limit"); // want this to be -1 if no ion mobility + imUpper = s.getMetaValue("ion mobility upper limit"); } + else + { + imLower = -1; + imUpper = -1; + } + const OpenSwath::SwathMap boundary(prec[0].getMZ() - prec[0].getIsolationWindowLowerOffset(), + prec[0].getMZ() + prec[0].getIsolationWindowUpperOffset(), + prec[0].getMZ(), + imLower, + imUpper, + false); bool found = false; - for (Size j = 0; j < known_window_boundaries.size(); j++) { - // We group by the precursor mz (center of the window) since this - // should be present - // for ion mobility, since the center value is not present in the raw data (it is computed) we use the imLower and upper bounds - if ((std::fabs(center - known_window_boundaries[j].center) < 1e-6) && (std::fabs(lowerIm - known_window_boundaries[j].imLower) < 1e-6) && (std::fabs(upperIm - known_window_boundaries[j].imUpper < 1e-6))) + // Check if the current scan is within the known window boundaries + if (known_window_boundaries[j].isEqual(boundary, TOLERANCE)) { found = true; swath_counter[j]++; @@ -359,23 +364,11 @@ namespace OpenMS { // we found a new SWATH scan swath_counter.push_back(1); - double lower = prec[0].getMZ() - prec[0].getIsolationWindowLowerOffset(); - double upper = prec[0].getMZ() + prec[0].getIsolationWindowUpperOffset(); - - OpenSwath::SwathMap boundary; - boundary.lower = lower; - boundary.upper = upper; - boundary.center = center; - - // set IM boundaries (if present) - boundary.imLower = lowerIm; - boundary.imUpper = upperIm; - known_window_boundaries.push_back(boundary); - OPENMS_LOG_DEBUG << "Adding Swath centered at " << center - << " m/z with an isolation window of " << lower << " to " << upper - << " m/z and start of " << lowerIm << " and IM end of " << upperIm << std::endl; + OPENMS_LOG_DEBUG << "Adding Swath centered at " << boundary.center + << " m/z with an isolation window of " << boundary.lower << " to " << boundary.upper + << " m/z and IM start of " << boundary.imLower << " and IM end of " << boundary.imUpper << std::endl; } } } diff --git a/src/openms_gui/include/OpenMS/VISUAL/APPLICATIONS/TOPPViewBase.h b/src/openms_gui/include/OpenMS/VISUAL/APPLICATIONS/TOPPViewBase.h index a2eb9b0f8bd..87b52985552 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/APPLICATIONS/TOPPViewBase.h +++ b/src/openms_gui/include/OpenMS/VISUAL/APPLICATIONS/TOPPViewBase.h @@ -187,10 +187,10 @@ namespace OpenMS @param data_type Type of the data @param show_as_1d Force dataset to be opened in 1D mode (even if it contains several spectra) @param show_options If the options dialog should be shown (otherwise the defaults are used) - @param as_new_window Open the layer in a new window within TOPPView + @param as_new_window Open the layer in a new window within TOPPView (ignored if 'window_id' is set) @param filename source file name (if the data came from a file) @param caption Sets the layer name and window caption of the data. If unset the file name is used. If set, the file is not monitored for changes. - @param window_id in which window the file is opened if opened as a new layer (0 or default equals current + @param window_id in which window the file is opened if opened as a new layer (0 will open a new window). @param spectrum_id determines the spectrum to show in 1D view. */ void addData(const FeatureMapSharedPtrType& feature_map, diff --git a/src/openms_gui/source/VISUAL/APPLICATIONS/MISC/QApplicationTOPP.cpp b/src/openms_gui/source/VISUAL/APPLICATIONS/MISC/QApplicationTOPP.cpp index 61f2cf18e27..e344971df2b 100644 --- a/src/openms_gui/source/VISUAL/APPLICATIONS/MISC/QApplicationTOPP.cpp +++ b/src/openms_gui/source/VISUAL/APPLICATIONS/MISC/QApplicationTOPP.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -116,24 +117,23 @@ namespace OpenMS // text QString text = QString("
" - "%1
" - "
" - "Version %2 %3" - "
" - "OpenMS and TOPP is free software available under the
" - "BSD 3-Clause License (BSD-new)
" - "
" - "
" - "
" - "
" - "
" - "Any published work based on TOPP and OpenMS shall cite these papers:
" - "Roest, Sachsenberg, Aiche, Bielow, Weisser et al., Nat Methods (2016), 13(9):741-748
" - "Kohlbacher et al., Bioinformatics (2007), 23:e191-e197
") + "%1
" + "
" + "Version %2 %3" + "
" + "OpenMS and TOPP is free software available under the
" + "BSD 3-Clause License (BSD-new)
" + "
" + "
" + "
" + "
" + "
" + "Any published work based on TOPP and OpenMS shall cite:
%4") .arg(toolname) .arg(VersionInfo::getVersion().toQString()) .arg( // if we have a revision, embed it also into the shown version number - VersionInfo::getRevision().empty() ? "" : QString(" (") + VersionInfo::getRevision().toQString() + ")"); + VersionInfo::getRevision().empty() ? "" : QString(" (") + VersionInfo::getRevision().toQString() + ")") + .arg((TOPPBase::cite_openms.title + "
" + TOPPBase::cite_openms.when_where + "
doi:" + TOPPBase::cite_openms.doi).c_str()); label = new QLabel(text, dlg); diff --git a/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPViewBase.cpp b/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPViewBase.cpp index 25db38312af..f47d5e1dd56 100644 --- a/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPViewBase.cpp +++ b/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPViewBase.cpp @@ -742,7 +742,7 @@ namespace OpenMS glock.unlock(); if (!annotate_path.empty()) - { + { // this opens a new window with raw data + annotation; we want the actual idXML data on top auto load_res = addDataFile(annotate_path, false, false); if (load_res == LOAD_RESULT::OK) { @@ -759,6 +759,7 @@ namespace OpenMS log_->appendNewHeader(LogWindow::LogState::NOTICE, "Error", "Annotation failed."); } } + window_id = getActivePlotWidget()->getWindowId(); // add ids on top of raw data } } diff --git a/src/openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/SwathMap.h b/src/openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/SwathMap.h index 16fb65322f9..2fc441408e4 100644 --- a/src/openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/SwathMap.h +++ b/src/openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/SwathMap.h @@ -9,6 +9,7 @@ #pragma once #include +#include namespace OpenSwath { @@ -39,7 +40,10 @@ namespace OpenSwath : lower(mz_start), upper(mz_end), center(mz_center), + imLower(-1), + imUpper(-1), ms1(is_ms1) + {} @@ -47,11 +51,20 @@ namespace OpenSwath : lower(mz_start), upper(mz_end), center(mz_center), - imLower(imLower), - imUpper(imUpper), + imLower(imLower), + imUpper(imUpper), ms1(is_ms1) {} + bool isEqual(const SwathMap& other, double tolerance = 1e-6) const + { + return (std::fabs(lower - other.lower) < tolerance) && + (std::fabs(upper - other.upper) < tolerance) && + (std::fabs(center - other.center) < tolerance) && + (std::fabs(imLower - other.imLower) < tolerance) && + (std::fabs(imUpper - other.imUpper) < tolerance) && + (ms1 == other.ms1); + } }; diff --git a/src/pyOpenMS/pyopenms-docs b/src/pyOpenMS/pyopenms-docs index 7628c8897d2..63fdab2a9e9 160000 --- a/src/pyOpenMS/pyopenms-docs +++ b/src/pyOpenMS/pyopenms-docs @@ -1 +1 @@ -Subproject commit 7628c8897d282c1574b4a1fe8cefb832ecff4cab +Subproject commit 63fdab2a9e9c765fc198bb70ef9f1f044711844e diff --git a/src/tests/class_tests/openms/executables.cmake b/src/tests/class_tests/openms/executables.cmake index 4d65d14c83b..ee14ae722f4 100644 --- a/src/tests/class_tests/openms/executables.cmake +++ b/src/tests/class_tests/openms/executables.cmake @@ -501,6 +501,7 @@ set(analysis_executables_list MetaboliteSpectralMatching_test ModifiedPeptideGenerator_test NeedlemanWunsch_test + NeighborSeq_test PeptideIndexing_test PeptideAndProteinQuant_test PeptideProteinResolution_test diff --git a/src/tests/class_tests/openms/source/NeighborSeq_test.cpp b/src/tests/class_tests/openms/source/NeighborSeq_test.cpp new file mode 100644 index 00000000000..9a32d9c53bf --- /dev/null +++ b/src/tests/class_tests/openms/source/NeighborSeq_test.cpp @@ -0,0 +1,212 @@ +// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Chris Bielow, Philipp Wang $ +// $Authors: Chris Bielow, Philipp Wang $ +// -------------------------------------------------------------------------- +#include +#include +#include +#include +#include + + +using namespace OpenMS; +using namespace std; + +START_TEST(NeighborSeq, "$Id$") + +// NS()=delete; + + +// Test section for the generateSpectrum function +// The spectra were generated via TOPPView and contained b-and y-ion +START_SECTION(MSSpectrum generateSpectrum(const String& peptide_sequence)) +{ + NeighborSeq ns({AASequence::fromString("TEST")}); + MSSpectrum spec_1 = ns.generateSpectrum(AASequence::fromString("PEPT")); + MSSpectrum spec_2 = ns.generateSpectrum(AASequence::fromString("AR")); + MSSpectrum spec_3 = ns.generateSpectrum(AASequence::fromString("VGLPINQR")); + + // Test "PEPT" + TEST_REAL_SIMILAR(spec_1[0].getMZ(), 98.0600); + TEST_REAL_SIMILAR(spec_1[1].getMZ(), 120.0655); + TEST_REAL_SIMILAR(spec_1[2].getMZ(), 217.1182); + TEST_REAL_SIMILAR(spec_1[3].getMZ(), 227.1026); + TEST_REAL_SIMILAR(spec_1[4].getMZ(), 324.1553); + TEST_REAL_SIMILAR(spec_1[5].getMZ(), 346.1608); + + + // Test "AR" + TEST_REAL_SIMILAR(spec_2[0].getMZ(), 72.04439); + TEST_REAL_SIMILAR(spec_2[1].getMZ(), 175.1189); + + // Test "VGLPINQR" + TEST_REAL_SIMILAR(spec_3[0].getMZ(), 100.0756); + TEST_REAL_SIMILAR(spec_3[1].getMZ(), 157.0971); + TEST_REAL_SIMILAR(spec_3[2].getMZ(), 175.1189); + TEST_REAL_SIMILAR(spec_3[3].getMZ(), 270.1812); + TEST_REAL_SIMILAR(spec_3[4].getMZ(), 303.1775); + TEST_REAL_SIMILAR(spec_3[5].getMZ(), 367.2339); + TEST_REAL_SIMILAR(spec_3[6].getMZ(), 417.2204); + TEST_REAL_SIMILAR(spec_3[7].getMZ(), 480.3180); + TEST_REAL_SIMILAR(spec_3[8].getMZ(), 530.3045); + TEST_REAL_SIMILAR(spec_3[9].getMZ(), 594.3609); + TEST_REAL_SIMILAR(spec_3[10].getMZ(), 627.3578); + TEST_REAL_SIMILAR(spec_3[11].getMZ(), 722.4195); + TEST_REAL_SIMILAR(spec_3[12].getMZ(), 740.4413); + TEST_REAL_SIMILAR(spec_3[13].getMZ(), 797.4628); + +} +END_SECTION + +// Test section for the compareSpectra function +START_SECTION( + static bool isNeighborSpectrum(const MSSpectrum& spec1, const MSSpectrum& spec2, const double min_shared_ion_fraction, const double mz_bin_size)) +{ + MSSpectrum spec1({Peak1D(100.00, 1.0), + Peak1D(200.00, 1.0), + Peak1D(300.00, 1.0)}); + + MSSpectrum spec2({Peak1D(100.05, 1.0), + Peak1D(200.05, 1.0), + Peak1D(300.05, 1.0)}); + + MSSpectrum spec3({Peak1D(101.00, 1.0), + Peak1D(201.00, 1.0), + Peak1D(301.00, 1.0)}); + + MSSpectrum spec4({Peak1D(100.05, 1.0), + Peak1D(201.00, 1.0), + Peak1D(300.05, 1.0), + Peak1D(301.00, 1.0)}); + + double min_shared_ion_fraction = 0.5; + + NeighborSeq ns({AASequence::fromString("TEST")}); + + // bin interval is from [a,b[ + TEST_TRUE (ns.isNeighborSpectrum(spec1, spec2, min_shared_ion_fraction, 1.0)) + TEST_FALSE(ns.isNeighborSpectrum(spec1, spec3, min_shared_ion_fraction, 1.0)) + TEST_TRUE (ns.isNeighborSpectrum(spec1, spec4, min_shared_ion_fraction, 1.0)) + TEST_FALSE(ns.isNeighborSpectrum(spec2, spec3, min_shared_ion_fraction, 1.0)) + TEST_TRUE (ns.isNeighborSpectrum(spec2, spec4, min_shared_ion_fraction, 1.0)) + TEST_TRUE (ns.isNeighborSpectrum(spec3, spec4, min_shared_ion_fraction, 1.0)) + + TEST_FALSE(ns.isNeighborSpectrum(spec1, spec2, min_shared_ion_fraction, 0.05)) + TEST_FALSE(ns.isNeighborSpectrum(spec1, spec3, min_shared_ion_fraction, 0.05)) + TEST_FALSE(ns.isNeighborSpectrum(spec1, spec4, min_shared_ion_fraction, 0.05)) + TEST_FALSE(ns.isNeighborSpectrum(spec2, spec3, min_shared_ion_fraction, 0.05)) + TEST_TRUE(ns.isNeighborSpectrum(spec2, spec4, min_shared_ion_fraction, 0.05)) + TEST_TRUE(ns.isNeighborSpectrum(spec3, spec4, min_shared_ion_fraction, 0.05)) +} +END_SECTION + + +// Test section for the findCandidatePositions function +START_SECTION(static int computeSharedIonCount(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& mz_bin_size)) +{ + MSSpectrum spec1({Peak1D(100.00, 1.0), + Peak1D(200.00, 1.0), + Peak1D(300.00, 1.0)}); + + MSSpectrum spec2({Peak1D(100.05, 1.0), + Peak1D(200.05, 1.0), + Peak1D(300.05, 1.0)}); + + MSSpectrum spec3({Peak1D(101.00, 1.0), + Peak1D(201.00, 1.0), + Peak1D(301.00, 1.0)}); + + MSSpectrum spec4({Peak1D(100.05, 1.0), + Peak1D(201.00, 1.0), + Peak1D(300.05, 1.0), + Peak1D(301.00, 1.0)}); + + NeighborSeq ns({AASequence::fromString("TEST")}); + + // bin interval is from [a,b[ + TEST_EQUAL(ns.computeSharedIonCount(spec1, spec2, 2.0), 3) + TEST_EQUAL(ns.computeSharedIonCount(spec1, spec3, 2.0), 3) + TEST_EQUAL(ns.computeSharedIonCount(spec1, spec4, 2.0), 3) + TEST_EQUAL(ns.computeSharedIonCount(spec2, spec3, 2.0), 3) + TEST_EQUAL(ns.computeSharedIonCount(spec2, spec4, 2.0), 3) + TEST_EQUAL(ns.computeSharedIonCount(spec3, spec4, 2.0), 3) + + TEST_EQUAL(ns.computeSharedIonCount(spec1, spec2, 1.0), 3) + TEST_EQUAL(ns.computeSharedIonCount(spec1, spec3, 1.0), 0) + TEST_EQUAL(ns.computeSharedIonCount(spec1, spec4, 1.0), 2) + TEST_EQUAL(ns.computeSharedIonCount(spec2, spec3, 1.0), 0) + TEST_EQUAL(ns.computeSharedIonCount(spec2, spec4, 1.0), 2) + TEST_EQUAL(ns.computeSharedIonCount(spec3, spec4, 1.0), 2) + + TEST_EQUAL(ns.computeSharedIonCount(spec1, spec2, 0.1), 3) + TEST_EQUAL(ns.computeSharedIonCount(spec1, spec3, 0.1), 0) + TEST_EQUAL(ns.computeSharedIonCount(spec1, spec4, 0.1), 2) + TEST_EQUAL(ns.computeSharedIonCount(spec2, spec3, 0.1), 0) + TEST_EQUAL(ns.computeSharedIonCount(spec2, spec4, 0.1), 2) + TEST_EQUAL(ns.computeSharedIonCount(spec3, spec4, 0.1), 2) +} +END_SECTION + +START_SECTION(bool isNeighborPeptide(const AASequence& neighbor_candidate, + const double mass_tolerance_pc, + const bool mass_tolerance_pc_ppm, + const double min_shared_ion_fraction, + const double mz_bin_size)) +{ + const AASequence AA_VELQSK = AASequence::fromString("VELQSK"); + const AASequence AA_SVQELK = AASequence::fromString("SVQELK"); + const AASequence AA_TVDQLK = AASequence::fromString("TVDQLK"); + + const AASequence AA_VESQLK = AASequence::fromString("VESQLK"); + std::vector seqs = {AASequence::fromString("VELQSK"), + AASequence::fromString("SVQELK"), + AASequence::fromString("TVDQLK"), + AASequence::fromString("VGEFK")}; + NeighborSeq ns(std::move(seqs)); + // VELQSK has neighbor VESQLK // shares 6 ions + // SVQELK has neighbor VESQLK // shares 4 ions + // TVDQLK has neighbor VESQLK // shares 6 ions + // VGEFK has neighbor GLDFK + + const double pc_tolerance = 0.01; + const double mz_bin_size = 0.05; + TEST_TRUE(std::abs(AA_VELQSK.getMonoWeight() - AA_VESQLK.getMonoWeight()) < pc_tolerance) + TEST_EQUAL(ns.computeSharedIonCount(ns.generateSpectrum(AA_VELQSK), ns.generateSpectrum(AA_VESQLK), mz_bin_size), 6) + TEST_EQUAL(ns.computeSharedIonCount(ns.generateSpectrum(AA_SVQELK), ns.generateSpectrum(AA_VESQLK), mz_bin_size), 4) + TEST_EQUAL(ns.computeSharedIonCount(ns.generateSpectrum(AA_TVDQLK), ns.generateSpectrum(AA_VESQLK), mz_bin_size), 6) + + // test the overlap threshold: + const double shared_ion_fraction = 6 * 2.0 / ( ((AA_VESQLK.size() - 1) * 2 /*b and y*/) * 2); + TEST_FALSE(ns.isNeighborPeptide(AASequence::fromString("VESQLK"), pc_tolerance, false, shared_ion_fraction + 0.1, mz_bin_size)) + // VESQLK matches VELQSK and TVDQLK (but not SVQELK since the overlap is insufficient) + TEST_TRUE (ns.isNeighborPeptide(AASequence::fromString("VESQLK"), pc_tolerance, false, shared_ion_fraction - 0.1, mz_bin_size)) + + // GLDFK matches to VGEFK + TEST_TRUE(ns.isNeighborPeptide(AASequence::fromString("GLDFK"), pc_tolerance, false, 0.25, mz_bin_size)) + + auto stats = ns.getNeighborStats(); + TEST_EQUAL(stats.unfindable_peptides, 0) + TEST_EQUAL(stats.findable_no_neighbors, 1) + TEST_EQUAL(stats.findable_one_neighbor, 3) + TEST_EQUAL(stats.findable_multiple_neighbors, 0) + + // test VESQLK again, which is a neighbor for 3 ref peptides at threshold 0.25 + TEST_TRUE(ns.isNeighborPeptide(AASequence::fromString("VESQLK"), pc_tolerance, false, 0.25, mz_bin_size)) + auto stats2 = ns.getNeighborStats(); + TEST_EQUAL(stats2.unfindable_peptides, 0) + TEST_EQUAL(stats2.findable_no_neighbors, 0) + TEST_EQUAL(stats2.findable_one_neighbor, 2) + TEST_EQUAL(stats2.findable_multiple_neighbors, 2) +} +END_SECTION + +START_SECTION(NeighborStats getNeighborStats() const) +{ + NOT_TESTABLE // tested above +} +END_SECTION + +END_TEST diff --git a/src/tests/class_tests/openms/source/VersionInfo_test.cpp b/src/tests/class_tests/openms/source/VersionInfo_test.cpp index 98a9d46b8ab..0cc76e60705 100644 --- a/src/tests/class_tests/openms/source/VersionInfo_test.cpp +++ b/src/tests/class_tests/openms/source/VersionInfo_test.cpp @@ -42,7 +42,7 @@ START_SECTION((static VersionDetails getVersionStruct())) { VersionInfo::VersionDetails detail; detail.version_major = 3; - detail.version_minor = 2; + detail.version_minor = 3; detail.version_patch = 0; TEST_EQUAL(VersionInfo::getVersionStruct().version_major, detail.version_major); TEST_EQUAL(VersionInfo::getVersionStruct().version_minor, detail.version_minor); diff --git a/src/tests/class_tests/openswathalgo/CMakeLists.txt b/src/tests/class_tests/openswathalgo/CMakeLists.txt index 6aa81cd80c2..423b8db2008 100644 --- a/src/tests/class_tests/openswathalgo/CMakeLists.txt +++ b/src/tests/class_tests/openswathalgo/CMakeLists.txt @@ -46,6 +46,7 @@ set(openswath_algo_tests Datastructures_test TestConvert DiaHelpers_test + SwathMap_test ) #------------------------------------------------------------------------------ diff --git a/src/tests/class_tests/openswathalgo/SwathMap_test.cpp b/src/tests/class_tests/openswathalgo/SwathMap_test.cpp new file mode 100644 index 00000000000..12a708e2dae --- /dev/null +++ b/src/tests/class_tests/openswathalgo/SwathMap_test.cpp @@ -0,0 +1,61 @@ +// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Joshua Charkow$ +// $Authors: Joshua Charkow$ +// -------------------------------------------------------------------------- + +#include "OpenMS/OPENSWATHALGO/DATAACCESS/SwathMap.h" +#include + +using namespace OpenMS; +using namespace std; + +/////////////////////////// + +START_TEST(SwathMap, "$Id$") + +///////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////// + +START_SECTION(testIsEqual) +{ + // map 1 and map 2 are equal + OpenSwath::SwathMap map1; + OpenSwath::SwathMap map2; + TEST_EQUAL(map1.isEqual(map2), true); + + // map 3 and map 4 are equal + // map 5 is different because of ms1 + // map 6,7 is different because of mz bounds + OpenSwath::SwathMap map3(1.0, 2.0, 1.5, false); + OpenSwath::SwathMap map4(1.0, 2.0, 1.5, false); + OpenSwath::SwathMap map5(1.0, 2.0, 1.5, true); + TEST_EQUAL(map3.isEqual(map4), true); + TEST_EQUAL(map3.isEqual(map5), false); + + // map 6,7 are different from map 3 different because of mz bounds + OpenSwath::SwathMap map6(1.0, 3.0, 2.0, false); + OpenSwath::SwathMap map7(2.0, 3.0, 2.5, false); + + // map 8 should be the same as map 3 + OpenSwath::SwathMap map8(1.0, 2.0, 1.5, -1, -1, false); + TEST_EQUAL(map3.isEqual(map8), true); + + // map 9, 10 are equal + OpenSwath::SwathMap map9(1.0, 2.0, 1.5, 1.0, 1.1, false); + OpenSwath::SwathMap map10(1.0, 2.0, 1.5, 1.0, 1.1, false); + TEST_EQUAL(map9.isEqual(map10), true); + + // map 11/12 is different from map 9 because of im bounds + OpenSwath::SwathMap map11(1.0, 2.0, 1.5, 1.3, 1.4, false); + OpenSwath::SwathMap map12(1.0, 2.0, 1.5, 1.0, 1.2, false); + TEST_EQUAL(map9.isEqual(map11), false); + TEST_EQUAL(map9.isEqual(map12), false); +} +END_SECTION + +///////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////// +END_TEST \ No newline at end of file diff --git a/src/tests/topp/CMakeLists.txt b/src/tests/topp/CMakeLists.txt index 060b515271a..05909362e78 100644 --- a/src/tests/topp/CMakeLists.txt +++ b/src/tests/topp/CMakeLists.txt @@ -2696,11 +2696,20 @@ set_tests_properties("TOPP_DecoyDatabase_3_out" PROPERTIES DEPENDS "TOPP_DecoyDa add_test("TOPP_DecoyDatabase_4" ${TOPP_BIN_PATH}/DecoyDatabase -test -type RNA -in ${DATA_DIR_TOPP}/DecoyDatabase_4.fasta -out DecoyDatabase_4.tmp.fasta -decoy_string "DECOY_SEQ_" -decoy_string_position "prefix" -method reverse -seed 42 ) add_test("TOPP_DecoyDatabase_4_out" ${DIFF} -in1 DecoyDatabase_4.tmp.fasta -in2 ${DATA_DIR_TOPP}/DecoyDatabase_4_out.fasta ) set_tests_properties("TOPP_DecoyDatabase_4_out" PROPERTIES DEPENDS "TOPP_DecoyDatabase_4") -# tests with already decoyed input data (should fail) +# ... tests with already decoyed input data (should fail) add_test("TOPP_DecoyDatabase_5" ${TOPP_BIN_PATH}/DecoyDatabase -test -in ${DATA_DIR_TOPP}/DecoyDatabase_5.fasta -out DecoyDatabase_5.tmp.fasta) set_tests_properties("TOPP_DecoyDatabase_5" PROPERTIES WILL_FAIL 1) add_test("TOPP_DecoyDatabase_6" ${TOPP_BIN_PATH}/DecoyDatabase -test -in ${DATA_DIR_TOPP}/DecoyDatabase_6.fasta -out DecoyDatabase_6.tmp.fasta) set_tests_properties("TOPP_DecoyDatabase_6" PROPERTIES WILL_FAIL 1) +# ... with subset neighbor search +add_test("TOPP_DecoyDatabase_7" ${TOPP_BIN_PATH}/DecoyDatabase -test -in ${DATA_DIR_TOPP}/DecoyDatabase_7.fasta -out DecoyDatabase_7_all.tmp.fasta -NeighborSearch:in_relevant_proteins ${DATA_DIR_TOPP}/DecoyDatabase_7_relevant.fasta -NeighborSearch:out_neighbor DecoyDatabase_7_neighbors.tmp.fasta -NeighborSearch:out_relevant DecoyDatabase_7_relevant.tmp.fasta) +add_test("TOPP_DecoyDatabase_7_out1" ${DIFF} -in1 DecoyDatabase_7_all.tmp.fasta -in2 ${DATA_DIR_TOPP}/DecoyDatabase_7_out_all.fasta ) +add_test("TOPP_DecoyDatabase_7_out2" ${DIFF} -in1 DecoyDatabase_7_neighbors.tmp.fasta -in2 ${DATA_DIR_TOPP}/DecoyDatabase_7_out_neighbors.fasta ) +add_test("TOPP_DecoyDatabase_7_out3" ${DIFF} -in1 DecoyDatabase_7_relevant.tmp.fasta -in2 ${DATA_DIR_TOPP}/DecoyDatabase_7_out_relevant.fasta ) +set_tests_properties("TOPP_DecoyDatabase_7_out1" PROPERTIES DEPENDS "TOPP_DecoyDatabase_7") +set_tests_properties("TOPP_DecoyDatabase_7_out2" PROPERTIES DEPENDS "TOPP_DecoyDatabase_7") +set_tests_properties("TOPP_DecoyDatabase_7_out3" PROPERTIES DEPENDS "TOPP_DecoyDatabase_7") + # SimpleSearchEngine: add_test("TOPP_SimpleSearchEngine_1" ${TOPP_BIN_PATH}/SimpleSearchEngine -test diff --git a/src/tests/topp/Decharger_input.ini b/src/tests/topp/Decharger_input.ini index 50dc19df525..a15f16b4885 100644 --- a/src/tests/topp/Decharger_input.ini +++ b/src/tests/topp/Decharger_input.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/DecoyDatabase_7.fasta b/src/tests/topp/DecoyDatabase_7.fasta new file mode 100644 index 00000000000..ee0a9232156 --- /dev/null +++ b/src/tests/topp/DecoyDatabase_7.fasta @@ -0,0 +1,29 @@ +>sp|P06750|AGGL_RICCO Agglutinin OS=Ricinus communis PE=1 SV=1 +MYAVATWLCFGSTSGWSFTLEDNNIFPKQYPIINFTTADATVESYTNFIRAVRSHLTTGA +DVRHEIPVLPNRVGLPISQRFILVELSNHAELSVTLALDVTNAYVVGCRAGNSAYFFHPD +NQEDAEAITHLFTDVQNSFTFAFGGNYDRLEQLGGLRENIELGTGPLEDAISALYYYSTC +GTQIPTLARSFMVCIQMISEAARFQYIEGEMRTRIRYNRRSAPDPSVITLENSWGRLSTA +IQESNQGAFASPIQLQRRNGSKFNVYDVSILIPIIALMVYRCAPPPSSQFSLLIRPVVPN +FNADVCMDPEPIVRIVGRNGLCVDVTGEEFFDGNPIQLWPCKSNTDWNQLWTLRKDSTIR +SNGKCLTISKSSPRQQVVIYNCSTATVGATRWQIWDNRTIINPRSGLVLAATSGNSGTKL +TVQTNIYAVSQGWLPTNNTQPFVTTIVGLYGMCLQANSGKVWLEDCTSEKAEQQWALYAD +GSIRPQQNRDNCLTTDANIKGTVVKILSCGPASSGQRWMFKNDGTILNLYNGLVLDVRRS +DPSLKQIIVHPFHGNLNQIWLPLF +>sp|P77985|CYSE_STAXY Serine acetyltransferase OS=Staphylococcus xylosus GN=cysE PE=3 SV=1 +MFKFLKRIKDDVNMVFEQDPAARTTLEVITSYAGVHAVWSHLIAHELYKKKKYVLARLIS +QVTRFFTGIEIHPGAQIGRRLFIDHGMGVVIGETCRIGDNVTIYQGVTLGGTGKERGKRH +PDIGDNVLIAAGAKVLGNITINANVNIGANSVVLNSVPSYSTVVGIPGHIVKQDGRRIGK +TFDHRNLPDPIYEQLKELEKQLEKTRNGEIQDDYII +>sp|B2K5N5|EFG_YERPB Elongation factor G OS=Yersinia pseudotuberculosis serotype IB (strain PB1/+) GN=fusA PE=3 SV=1 (as a spurious hit) +MARKTPIERYRNIGISAHIDAGKTTTTERILFYTGVNHKIGEVHDGAATMDWMEQEQERG +ITITSAATTCFWSGMAKQFEPHHVNIIDTPGHVDFTIEVERSMRVLDGAVMVYCAVGGVQ +PQSETVWRQANKYKVPRIAFVNKMDRMGANFLRVVGQLKSRLGANPVPLQLAIGAEEKFT +GIIDLVKMKAINWNEADQGVTFEYEEIPADMAELAAEWHQNLVESAAEASDELMDKYLGG +EELTEEEIKKALRQRVLKSEIILVTCGSAFKNKGVQAMLDAVIEYLPAPTDVESINGILD +DGKDTPAVRHSDDKEPFSALAFKIATDPFVGNLTFFRVYSGIVNSGDTVLNSVKSQRERL +GRIVQMHANKREEIKEVHAGDIAAAIGLKDVTTGDTLCDPNNPIILERMEFPEPVISVAV +EPKTKADQEKMGMALGRLAKEDPSFRVWTDEESGQTIIAGMGELHLDILVDRMRREFNVE +ANVGKPQVAYRETIRETVKDVEGKHAKQSGGRGQYGHVVIDMSPLPPGGVGYEFVNEIVG +GSIPKEFIPAVDKGIQEQLKSGPLAGYPVVDVKVRLHYGSYHDVDSSELAFKLAGSIAFK +EGFKRAKPVLLEPIMKVEVETPEDYMGDVMGDLNRRRGIIEGMEDTATGKTVRVKVPLSE +MFGYATDLRSQTQGRASYSMEFLEYAEAPSNVAKAVIEARGK diff --git a/src/tests/topp/DecoyDatabase_7_out_all.fasta b/src/tests/topp/DecoyDatabase_7_out_all.fasta new file mode 100644 index 00000000000..245d56b5fae --- /dev/null +++ b/src/tests/topp/DecoyDatabase_7_out_all.fasta @@ -0,0 +1,40 @@ +>tr|B9T8T0|B9T8T0_RICCO Ribosome-inactivating protein OS=Ricinus communis OX=3988 GN=RCOM_2159910 PE=3 SV=1 +MKPGGNTIVIWMYAVATWLCFGSTSGWSFTLEDNNIFPKQYPIINFTTAGATVQSYTNFIRAVRGRLTTGADVRHEIPVL +PNRVGLPINQRFILVELSNHAELSVTLALDVTNAYVVGYRAGNSAYFFHPDNQEDAEAITHLFTDVQNRYTFAFGGNYDR +LEQLAGNLRENIELGNGPLEEAISALYYYSTGGTQLPTLARSFIICIQMISEAARFQYIEGEMRTRIRYNRRSAPDPSVI +TLENSWGRLSTAIQESNQGAFASPIQLQRRNGSKFSVYDVSILIPIIALMVYRCAPPPSSQFSLLIRPVVPNFNADVCMD +PEPIVRIVGRNGLCVDVRDGRFHNGNAIQLWPCKSNTDANQLWTLKRDNTIRSNGKCLTTYGYSPGVYVMIYDCNTAATD +ATRWQIWDNGTIINPRSSLVLAATSGNSGTTLTVQTNIYAVSQGWLPTNNTQPFVTTIVGLYGLCLQANSGQVWIEDCSS +EKAEQQWALYADGSIRPQQNRDNCLTSDSNIRETVVKILSCGPASSGQRWMFKNDGTILNLYSGLVLDVRASDPSLKQII +LYPLHGDPNQIWLPLF +>DECOY_tr|B9T8T0|B9T8T0_RICCO Ribosome-inactivating protein OS=Ricinus communis OX=3988 GN=RCOM_2159910 PE=3 SV=1 +MPFINNDELTFSWGSTSGFCLWTAVAYMWIVITNGGPKKQIFNTYSQVTAGATTFNIIPYRAVRGRLVDAGTTRHNPLVP +IERVQNIPLGRFYGVVYANTVDLALTVSLEAHNSLEVLIRANQVDTFLHTIAEADEQNDPHFFYASNGRYDYNGGFAFTR +LLNGALQEREALTPLQTGGTSYYYLASIAEELPGNGLEINRSAAESIMQICIIFRFMEGEIYQRTRIRYNRRSGWSNELT +IVSPDPARLQLQIPSAFAGQNSEQIATSRRNSGKFYVMLAIIPILISVDYVSRCVIPEPDMCVDANFNPVVPRILLSFQS +SPPPARIGVRNVDVCLGRDGRFCPWLQIANGNHKSLTWLQNADTNKRDITNRSGNKCTADTAATNCDYIMVYVGPSYGYT +TLRWPNIITGNDWIQRSESSCDEIWVQGSNAQLCLGYLGVITTVFPQTNNTPLWGQSVAYINTQVTLTTGSNGSTAALVL +SKANQQPRISGDAYLAWQQERDINSDSTLCNREVVTKIQGSSAPGCSLRWFMKNVDLVLGSYLNLITGDRALSPDSKQLP +LWIQNPDGHLPYLIIF +>sp|Q6GBV9|CYSE_STAAS Serine acetyltransferase OS=Staphylococcus aureus (strain MSSA476) GN=cysE PE=3 SV=1 +MILLKRMRDDIKMVFEQDPAARSTLEVITTYAGLHAVWSHLIAHKLYNQKKYVAARAISQISRFFTGIEIHPGAKIGKRL +FIDHGMGVVIGETCTIGDNVTIYQGVTLGGTGKERGKRHPDIGDNVLIAAGAKVLGNIKINSNVNIGANSVVLQSVPSYS +TVVGIPGHIVKQDGVRVGKTFDHRHLPDPIYEQIKHLERQLEKTRNGEIQDDYII +>DECOY_sp|Q6GBV9|CYSE_STAAS Serine acetyltransferase OS=Staphylococcus aureus (strain MSSA476) GN=cysE PE=3 SV=1 +MLLIKRMRDIDKMAAPDQEFVRSHAILHSWVAHLGAYTTIVELTKLQNYKKYAAVRASIQSIRFAGPHIEIGTFKIGKRL +GTGGLTVGQYITVNDGITCTEGIVVGMGHDIFKERGKRHAGAAILVNDGIDPKVINGLKIVIHGPIGVVTSYSPVSQLVV +SNAGINVNSNKQVGDRVGKTHDFRHIQEYIPDPLKHELRQELKTRNIYDDQIEGI +>neighbor_sp|P06750|AGGL_RICCO Agglutinin OS=Ricinus communis PE=1 SV=1 +HEIPVLPNRFQYIEGEMRSAPDPSVITLENSWGRLSTAIQESNQGAFASPIQLQRCAPPPSSQFSLLIRPVVPNFNADVC +MDPEPIVRAEQQWALYADGSIRPQQNRILSCGPASSGQR +>DECOY_neighbor_sp|P06750|AGGL_RICCO Agglutinin OS=Ricinus communis PE=1 SV=1 +HNPLVPIERFMEGEIYQRSGWSNELTIVSPDPARLQLQIPSAFAGQNSEQIATSRCVIPEPDMCVDANFNPVVPRILLSF +QSSPPPARANQQPRISGDAYLAWQQERIQGSSAPGCSLR +>neighbor_sp|P77985|CYSE_STAXY Serine acetyltransferase OS=Staphylococcus xylosus GN=cysE PE=3 SV=1 +HPDIGDNVLIAAGAKTFDHRNGEIQDDYII +>DECOY_neighbor_sp|P77985|CYSE_STAXY Serine acetyltransferase OS=Staphylococcus xylosus GN=cysE PE=3 SV=1 +HAGAAILVNDGIDPKTHDFRNIYDDQIEGI +>neighbor_sp|B2K5N5|EFG_YERPB Elongation factor G OS=Yersinia pseudotuberculosis serotype IB (strain PB1/+) GN=fusA PE=3 SV=1 (as a spurious hit) +VVGQLK +>DECOY_neighbor_sp|B2K5N5|EFG_YERPB Elongation factor G OS=Yersinia pseudotuberculosis serotype IB (strain PB1/+) GN=fusA PE=3 SV=1 (as a spurious hit) +VLQGVK diff --git a/src/tests/topp/DecoyDatabase_7_out_neighbors.fasta b/src/tests/topp/DecoyDatabase_7_out_neighbors.fasta new file mode 100644 index 00000000000..4ba7a9a160b --- /dev/null +++ b/src/tests/topp/DecoyDatabase_7_out_neighbors.fasta @@ -0,0 +1,7 @@ +>neighbor_sp|P06750|AGGL_RICCO Agglutinin OS=Ricinus communis PE=1 SV=1 +HEIPVLPNRFQYIEGEMRSAPDPSVITLENSWGRLSTAIQESNQGAFASPIQLQRCAPPPSSQFSLLIRPVVPNFNADVC +MDPEPIVRAEQQWALYADGSIRPQQNRILSCGPASSGQR +>neighbor_sp|P77985|CYSE_STAXY Serine acetyltransferase OS=Staphylococcus xylosus GN=cysE PE=3 SV=1 +HPDIGDNVLIAAGAKTFDHRNGEIQDDYII +>neighbor_sp|B2K5N5|EFG_YERPB Elongation factor G OS=Yersinia pseudotuberculosis serotype IB (strain PB1/+) GN=fusA PE=3 SV=1 (as a spurious hit) +VVGQLK diff --git a/src/tests/topp/DecoyDatabase_7_out_relevant.fasta b/src/tests/topp/DecoyDatabase_7_out_relevant.fasta new file mode 100644 index 00000000000..a239003b48a --- /dev/null +++ b/src/tests/topp/DecoyDatabase_7_out_relevant.fasta @@ -0,0 +1,26 @@ +>tr|B9T8T0|B9T8T0_RICCO Ribosome-inactivating protein OS=Ricinus communis OX=3988 GN=RCOM_2159910 PE=3 SV=1 +MKPGGNTIVIWMYAVATWLCFGSTSGWSFTLEDNNIFPKQYPIINFTTAGATVQSYTNFIRAVRGRLTTGADVRHEIPVL +PNRVGLPINQRFILVELSNHAELSVTLALDVTNAYVVGYRAGNSAYFFHPDNQEDAEAITHLFTDVQNRYTFAFGGNYDR +LEQLAGNLRENIELGNGPLEEAISALYYYSTGGTQLPTLARSFIICIQMISEAARFQYIEGEMRTRIRYNRRSAPDPSVI +TLENSWGRLSTAIQESNQGAFASPIQLQRRNGSKFSVYDVSILIPIIALMVYRCAPPPSSQFSLLIRPVVPNFNADVCMD +PEPIVRIVGRNGLCVDVRDGRFHNGNAIQLWPCKSNTDANQLWTLKRDNTIRSNGKCLTTYGYSPGVYVMIYDCNTAATD +ATRWQIWDNGTIINPRSSLVLAATSGNSGTTLTVQTNIYAVSQGWLPTNNTQPFVTTIVGLYGLCLQANSGQVWIEDCSS +EKAEQQWALYADGSIRPQQNRDNCLTSDSNIRETVVKILSCGPASSGQRWMFKNDGTILNLYSGLVLDVRASDPSLKQII +LYPLHGDPNQIWLPLF +>DECOY_tr|B9T8T0|B9T8T0_RICCO Ribosome-inactivating protein OS=Ricinus communis OX=3988 GN=RCOM_2159910 PE=3 SV=1 +MPFINNDELTFSWGSTSGFCLWTAVAYMWIVITNGGPKKQIFNTYSQVTAGATTFNIIPYRAVRGRLVDAGTTRHNPLVP +IERVQNIPLGRFYGVVYANTVDLALTVSLEAHNSLEVLIRANQVDTFLHTIAEADEQNDPHFFYASNGRYDYNGGFAFTR +LLNGALQEREALTPLQTGGTSYYYLASIAEELPGNGLEINRSAAESIMQICIIFRFMEGEIYQRTRIRYNRRSGWSNELT +IVSPDPARLQLQIPSAFAGQNSEQIATSRRNSGKFYVMLAIIPILISVDYVSRCVIPEPDMCVDANFNPVVPRILLSFQS +SPPPARIGVRNVDVCLGRDGRFCPWLQIANGNHKSLTWLQNADTNKRDITNRSGNKCTADTAATNCDYIMVYVGPSYGYT +TLRWPNIITGNDWIQRSESSCDEIWVQGSNAQLCLGYLGVITTVFPQTNNTPLWGQSVAYINTQVTLTTGSNGSTAALVL +SKANQQPRISGDAYLAWQQERDINSDSTLCNREVVTKIQGSSAPGCSLRWFMKNVDLVLGSYLNLITGDRALSPDSKQLP +LWIQNPDGHLPYLIIF +>sp|Q6GBV9|CYSE_STAAS Serine acetyltransferase OS=Staphylococcus aureus (strain MSSA476) GN=cysE PE=3 SV=1 +MILLKRMRDDIKMVFEQDPAARSTLEVITTYAGLHAVWSHLIAHKLYNQKKYVAARAISQISRFFTGIEIHPGAKIGKRL +FIDHGMGVVIGETCTIGDNVTIYQGVTLGGTGKERGKRHPDIGDNVLIAAGAKVLGNIKINSNVNIGANSVVLQSVPSYS +TVVGIPGHIVKQDGVRVGKTFDHRHLPDPIYEQIKHLERQLEKTRNGEIQDDYII +>DECOY_sp|Q6GBV9|CYSE_STAAS Serine acetyltransferase OS=Staphylococcus aureus (strain MSSA476) GN=cysE PE=3 SV=1 +MLLIKRMRDIDKMAAPDQEFVRSHAILHSWVAHLGAYTTIVELTKLQNYKKYAAVRASIQSIRFAGPHIEIGTFKIGKRL +GTGGLTVGQYITVNDGITCTEGIVVGMGHDIFKERGKRHAGAAILVNDGIDPKVINGLKIVIHGPIGVVTSYSPVSQLVV +SNAGINVNSNKQVGDRVGKTHDFRHIQEYIPDPLKHELRQELKTRNIYDDQIEGI diff --git a/src/tests/topp/DecoyDatabase_7_relevant.fasta b/src/tests/topp/DecoyDatabase_7_relevant.fasta new file mode 100644 index 00000000000..2d1f1783562 --- /dev/null +++ b/src/tests/topp/DecoyDatabase_7_relevant.fasta @@ -0,0 +1,19 @@ +>tr|B9T8T0|B9T8T0_RICCO Ribosome-inactivating protein OS=Ricinus communis OX=3988 GN=RCOM_2159910 PE=3 SV=1 +MKPGGNTIVIWMYAVATWLCFGSTSGWSFTLEDNNIFPKQYPIINFTTAGATVQSYTNFI +RAVRGRLTTGADVRHEIPVLPNRVGLPINQRFILVELSNHAELSVTLALDVTNAYVVGYR +AGNSAYFFHPDNQEDAEAITHLFTDVQNRYTFAFGGNYDRLEQLAGNLRENIELGNGPLE +EAISALYYYSTGGTQLPTLARSFIICIQMISEAARFQYIEGEMRTRIRYNRRSAPDPSVI +TLENSWGRLSTAIQESNQGAFASPIQLQRRNGSKFSVYDVSILIPIIALMVYRCAPPPSS +QFSLLIRPVVPNFNADVCMDPEPIVRIVGRNGLCVDVRDGRFHNGNAIQLWPCKSNTDAN +QLWTLKRDNTIRSNGKCLTTYGYSPGVYVMIYDCNTAATDATRWQIWDNGTIINPRSSLV +LAATSGNSGTTLTVQTNIYAVSQGWLPTNNTQPFVTTIVGLYGLCLQANSGQVWIEDCSS +EKAEQQWALYADGSIRPQQNRDNCLTSDSNIRETVVKILSCGPASSGQRWMFKNDGTILN +LYSGLVLDVRASDPSLKQIILYPLHGDPNQIWLPLF + + + +>sp|Q6GBV9|CYSE_STAAS Serine acetyltransferase OS=Staphylococcus aureus (strain MSSA476) GN=cysE PE=3 SV=1 +MILLKRMRDDIKMVFEQDPAARSTLEVITTYAGLHAVWSHLIAHKLYNQKKYVAARAISQ +ISRFFTGIEIHPGAKIGKRLFIDHGMGVVIGETCTIGDNVTIYQGVTLGGTGKERGKRHP +DIGDNVLIAAGAKVLGNIKINSNVNIGANSVVLQSVPSYSTVVGIPGHIVKQDGVRVGKT +FDHRHLPDPIYEQIKHLERQLEKTRNGEIQDDYII diff --git a/src/tests/topp/FeatureFinderCentroided_1_parameters.ini b/src/tests/topp/FeatureFinderCentroided_1_parameters.ini index a37fd23a617..9b9784779e4 100644 --- a/src/tests/topp/FeatureFinderCentroided_1_parameters.ini +++ b/src/tests/topp/FeatureFinderCentroided_1_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureFinderMetabo.ini b/src/tests/topp/FeatureFinderMetabo.ini index 7241ccde487..426d1bf6278 100644 --- a/src/tests/topp/FeatureFinderMetabo.ini +++ b/src/tests/topp/FeatureFinderMetabo.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureFinderMetabo_2_noEPD.ini b/src/tests/topp/FeatureFinderMetabo_2_noEPD.ini index 230720ac11e..96076c294db 100644 --- a/src/tests/topp/FeatureFinderMetabo_2_noEPD.ini +++ b/src/tests/topp/FeatureFinderMetabo_2_noEPD.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureFinderMetabo_3.ini b/src/tests/topp/FeatureFinderMetabo_3.ini index 49a1d8610c6..6195dabead8 100644 --- a/src/tests/topp/FeatureFinderMetabo_3.ini +++ b/src/tests/topp/FeatureFinderMetabo_3.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureFinderMetabo_4.ini b/src/tests/topp/FeatureFinderMetabo_4.ini index bb01bea7ded..eb12b2d5016 100644 --- a/src/tests/topp/FeatureFinderMetabo_4.ini +++ b/src/tests/topp/FeatureFinderMetabo_4.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureFinderMultiplex_10_parameters.ini b/src/tests/topp/FeatureFinderMultiplex_10_parameters.ini index ab343f2f1ee..5a4ce4aa197 100644 --- a/src/tests/topp/FeatureFinderMultiplex_10_parameters.ini +++ b/src/tests/topp/FeatureFinderMultiplex_10_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureFinderMultiplex_11_parameters.ini b/src/tests/topp/FeatureFinderMultiplex_11_parameters.ini index e545814a966..c146073e4c1 100644 --- a/src/tests/topp/FeatureFinderMultiplex_11_parameters.ini +++ b/src/tests/topp/FeatureFinderMultiplex_11_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureFinderMultiplex_1_parameters.ini b/src/tests/topp/FeatureFinderMultiplex_1_parameters.ini index cc93fe0712c..04aa83ead49 100644 --- a/src/tests/topp/FeatureFinderMultiplex_1_parameters.ini +++ b/src/tests/topp/FeatureFinderMultiplex_1_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureFinderMultiplex_2_parameters.ini b/src/tests/topp/FeatureFinderMultiplex_2_parameters.ini index 73592781f34..6379695e193 100644 --- a/src/tests/topp/FeatureFinderMultiplex_2_parameters.ini +++ b/src/tests/topp/FeatureFinderMultiplex_2_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureFinderMultiplex_3_parameters.ini b/src/tests/topp/FeatureFinderMultiplex_3_parameters.ini index 742274c787f..d7becb2fa70 100644 --- a/src/tests/topp/FeatureFinderMultiplex_3_parameters.ini +++ b/src/tests/topp/FeatureFinderMultiplex_3_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureFinderMultiplex_4_parameters.ini b/src/tests/topp/FeatureFinderMultiplex_4_parameters.ini index e21bb4a9026..336469a6a9b 100644 --- a/src/tests/topp/FeatureFinderMultiplex_4_parameters.ini +++ b/src/tests/topp/FeatureFinderMultiplex_4_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureFinderMultiplex_5_parameters.ini b/src/tests/topp/FeatureFinderMultiplex_5_parameters.ini index 4e033d1c5f9..f53292d2c06 100644 --- a/src/tests/topp/FeatureFinderMultiplex_5_parameters.ini +++ b/src/tests/topp/FeatureFinderMultiplex_5_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureFinderMultiplex_6_parameters.ini b/src/tests/topp/FeatureFinderMultiplex_6_parameters.ini index ebf24260859..a940b935809 100644 --- a/src/tests/topp/FeatureFinderMultiplex_6_parameters.ini +++ b/src/tests/topp/FeatureFinderMultiplex_6_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureFinderMultiplex_7_parameters.ini b/src/tests/topp/FeatureFinderMultiplex_7_parameters.ini index 79d58d9cde2..b26f578ab85 100644 --- a/src/tests/topp/FeatureFinderMultiplex_7_parameters.ini +++ b/src/tests/topp/FeatureFinderMultiplex_7_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureFinderMultiplex_8_parameters.ini b/src/tests/topp/FeatureFinderMultiplex_8_parameters.ini index 46641f7528f..69fa82eeb16 100644 --- a/src/tests/topp/FeatureFinderMultiplex_8_parameters.ini +++ b/src/tests/topp/FeatureFinderMultiplex_8_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureFinderMultiplex_9_parameters.ini b/src/tests/topp/FeatureFinderMultiplex_9_parameters.ini index 5393780efc6..26731643d3c 100644 --- a/src/tests/topp/FeatureFinderMultiplex_9_parameters.ini +++ b/src/tests/topp/FeatureFinderMultiplex_9_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureLinkerLabeled_1_parameters.ini b/src/tests/topp/FeatureLinkerLabeled_1_parameters.ini index 2baf09e13c4..82fcf57533b 100644 --- a/src/tests/topp/FeatureLinkerLabeled_1_parameters.ini +++ b/src/tests/topp/FeatureLinkerLabeled_1_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureLinkerLabeled_2_parameters.ini b/src/tests/topp/FeatureLinkerLabeled_2_parameters.ini index 4c9ec3d6d94..2911df1836c 100644 --- a/src/tests/topp/FeatureLinkerLabeled_2_parameters.ini +++ b/src/tests/topp/FeatureLinkerLabeled_2_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureLinkerUnlabeledKD_1_parameters.ini b/src/tests/topp/FeatureLinkerUnlabeledKD_1_parameters.ini index c66d7102c44..42204dc4564 100644 --- a/src/tests/topp/FeatureLinkerUnlabeledKD_1_parameters.ini +++ b/src/tests/topp/FeatureLinkerUnlabeledKD_1_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureLinkerUnlabeledKD_2_parameters.ini b/src/tests/topp/FeatureLinkerUnlabeledKD_2_parameters.ini index 99a8eadbf58..71908839cd7 100644 --- a/src/tests/topp/FeatureLinkerUnlabeledKD_2_parameters.ini +++ b/src/tests/topp/FeatureLinkerUnlabeledKD_2_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureLinkerUnlabeledKD_3_parameters.ini b/src/tests/topp/FeatureLinkerUnlabeledKD_3_parameters.ini index f1f38b3f6b0..a7c16280781 100644 --- a/src/tests/topp/FeatureLinkerUnlabeledKD_3_parameters.ini +++ b/src/tests/topp/FeatureLinkerUnlabeledKD_3_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureLinkerUnlabeledKD_4_parameters.ini b/src/tests/topp/FeatureLinkerUnlabeledKD_4_parameters.ini index 9e430d8ab42..f747d3b8989 100644 --- a/src/tests/topp/FeatureLinkerUnlabeledKD_4_parameters.ini +++ b/src/tests/topp/FeatureLinkerUnlabeledKD_4_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureLinkerUnlabeledQT_1_parameters.ini b/src/tests/topp/FeatureLinkerUnlabeledQT_1_parameters.ini index 119ed72c130..39ab3aa8594 100644 --- a/src/tests/topp/FeatureLinkerUnlabeledQT_1_parameters.ini +++ b/src/tests/topp/FeatureLinkerUnlabeledQT_1_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureLinkerUnlabeledQT_2_parameters.ini b/src/tests/topp/FeatureLinkerUnlabeledQT_2_parameters.ini index 33d10cb51ab..7c92799f20f 100644 --- a/src/tests/topp/FeatureLinkerUnlabeledQT_2_parameters.ini +++ b/src/tests/topp/FeatureLinkerUnlabeledQT_2_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureLinkerUnlabeledQT_3_parameters.ini b/src/tests/topp/FeatureLinkerUnlabeledQT_3_parameters.ini index a8926c03c30..7183a4f4bf4 100644 --- a/src/tests/topp/FeatureLinkerUnlabeledQT_3_parameters.ini +++ b/src/tests/topp/FeatureLinkerUnlabeledQT_3_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureLinkerUnlabeled_1_parameters.ini b/src/tests/topp/FeatureLinkerUnlabeled_1_parameters.ini index c4e24a76325..ed59461bcd9 100644 --- a/src/tests/topp/FeatureLinkerUnlabeled_1_parameters.ini +++ b/src/tests/topp/FeatureLinkerUnlabeled_1_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureLinkerUnlabeled_2_parameters.ini b/src/tests/topp/FeatureLinkerUnlabeled_2_parameters.ini index cb736d53e4b..c48d491a101 100644 --- a/src/tests/topp/FeatureLinkerUnlabeled_2_parameters.ini +++ b/src/tests/topp/FeatureLinkerUnlabeled_2_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureLinkerUnlabeled_3_parameters.ini b/src/tests/topp/FeatureLinkerUnlabeled_3_parameters.ini index d781ea4c205..897d3af09ea 100644 --- a/src/tests/topp/FeatureLinkerUnlabeled_3_parameters.ini +++ b/src/tests/topp/FeatureLinkerUnlabeled_3_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FeatureLinkerUnlabeled_4_parameters.ini b/src/tests/topp/FeatureLinkerUnlabeled_4_parameters.ini index ebebb7aa95e..83e837e6ec2 100644 --- a/src/tests/topp/FeatureLinkerUnlabeled_4_parameters.ini +++ b/src/tests/topp/FeatureLinkerUnlabeled_4_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/FuzzyDiff.ini b/src/tests/topp/FuzzyDiff.ini index 34ac7c0f35f..5e5fb523a2b 100644 --- a/src/tests/topp/FuzzyDiff.ini +++ b/src/tests/topp/FuzzyDiff.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/GNPSExport_1_mostint.ini b/src/tests/topp/GNPSExport_1_mostint.ini index 0d94962d17e..3789f6a6ea1 100644 --- a/src/tests/topp/GNPSExport_1_mostint.ini +++ b/src/tests/topp/GNPSExport_1_mostint.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/GNPSExport_2_merged.ini b/src/tests/topp/GNPSExport_2_merged.ini index a56ee5e5d0a..161d0ef4711 100644 --- a/src/tests/topp/GNPSExport_2_merged.ini +++ b/src/tests/topp/GNPSExport_2_merged.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/GNPSExport_3_binsize.ini b/src/tests/topp/GNPSExport_3_binsize.ini index 897da74b661..1cc5a2a4df9 100644 --- a/src/tests/topp/GNPSExport_3_binsize.ini +++ b/src/tests/topp/GNPSExport_3_binsize.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/IDFileConverter_5_parameters.ini b/src/tests/topp/IDFileConverter_5_parameters.ini index 5e79bb0cf03..051fcf1a40c 100644 --- a/src/tests/topp/IDFileConverter_5_parameters.ini +++ b/src/tests/topp/IDFileConverter_5_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/INIUpdater_1_noupdate.toppas b/src/tests/topp/INIUpdater_1_noupdate.toppas index 49627d2c8e5..953e1c10764 100644 --- a/src/tests/topp/INIUpdater_1_noupdate.toppas +++ b/src/tests/topp/INIUpdater_1_noupdate.toppas @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/IsobaricAnalyzer.ini b/src/tests/topp/IsobaricAnalyzer.ini index b02ffdfd22f..701f6df0149 100644 --- a/src/tests/topp/IsobaricAnalyzer.ini +++ b/src/tests/topp/IsobaricAnalyzer.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/MapAlignerIdentification_parameters.ini b/src/tests/topp/MapAlignerIdentification_parameters.ini index f8f04656cfa..e6e857e721d 100644 --- a/src/tests/topp/MapAlignerIdentification_parameters.ini +++ b/src/tests/topp/MapAlignerIdentification_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/MapAlignerPoseClustering_1_parameters.ini b/src/tests/topp/MapAlignerPoseClustering_1_parameters.ini index 73e454d769b..e731a16a31a 100644 --- a/src/tests/topp/MapAlignerPoseClustering_1_parameters.ini +++ b/src/tests/topp/MapAlignerPoseClustering_1_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/MapAlignerPoseClustering_2_parameters.ini b/src/tests/topp/MapAlignerPoseClustering_2_parameters.ini index a9494615705..f50b955bb09 100644 --- a/src/tests/topp/MapAlignerPoseClustering_2_parameters.ini +++ b/src/tests/topp/MapAlignerPoseClustering_2_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/MapAlignerSpectrum_parameters.ini b/src/tests/topp/MapAlignerSpectrum_parameters.ini index c3071d7b320..d36217dcdfc 100644 --- a/src/tests/topp/MapAlignerSpectrum_parameters.ini +++ b/src/tests/topp/MapAlignerSpectrum_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/MapAlignerTreeGuided_parameters.ini b/src/tests/topp/MapAlignerTreeGuided_parameters.ini index 0d9a316155e..f53bcdd990f 100644 --- a/src/tests/topp/MapAlignerTreeGuided_parameters.ini +++ b/src/tests/topp/MapAlignerTreeGuided_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/MapAlignerTreeGuided_parameters2.ini b/src/tests/topp/MapAlignerTreeGuided_parameters2.ini index 4823cb75954..84eda643e63 100644 --- a/src/tests/topp/MapAlignerTreeGuided_parameters2.ini +++ b/src/tests/topp/MapAlignerTreeGuided_parameters2.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/MassTraceExtractor.ini b/src/tests/topp/MassTraceExtractor.ini index ee7fdedc99e..926fa6e16fb 100644 --- a/src/tests/topp/MassTraceExtractor.ini +++ b/src/tests/topp/MassTraceExtractor.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/MassTraceExtractor_2.ini b/src/tests/topp/MassTraceExtractor_2.ini index b187a25ce79..c3bcbeb8555 100644 --- a/src/tests/topp/MassTraceExtractor_2.ini +++ b/src/tests/topp/MassTraceExtractor_2.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/MassTraceExtractor_3_expected.ini b/src/tests/topp/MassTraceExtractor_3_expected.ini index 9068394ec4b..5b565bfde80 100644 --- a/src/tests/topp/MassTraceExtractor_3_expected.ini +++ b/src/tests/topp/MassTraceExtractor_3_expected.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/MultiplexResolver_1_parameters.ini b/src/tests/topp/MultiplexResolver_1_parameters.ini index 6b6189d6a95..c28f6fa91ee 100644 --- a/src/tests/topp/MultiplexResolver_1_parameters.ini +++ b/src/tests/topp/MultiplexResolver_1_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/MultiplexResolver_2_parameters.ini b/src/tests/topp/MultiplexResolver_2_parameters.ini index 3b2a530fafe..8928343dcd4 100644 --- a/src/tests/topp/MultiplexResolver_2_parameters.ini +++ b/src/tests/topp/MultiplexResolver_2_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/MultiplexResolver_3_parameters.ini b/src/tests/topp/MultiplexResolver_3_parameters.ini index c2d1a76baf0..0877ff12c33 100644 --- a/src/tests/topp/MultiplexResolver_3_parameters.ini +++ b/src/tests/topp/MultiplexResolver_3_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/MultiplexResolver_4_parameters.ini b/src/tests/topp/MultiplexResolver_4_parameters.ini index fecac9a38a4..7636f9ba079 100644 --- a/src/tests/topp/MultiplexResolver_4_parameters.ini +++ b/src/tests/topp/MultiplexResolver_4_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/NoiseFilterGaussian_2_parameters.ini b/src/tests/topp/NoiseFilterGaussian_2_parameters.ini index 4ccd59f245f..88f7d90f44d 100644 --- a/src/tests/topp/NoiseFilterGaussian_2_parameters.ini +++ b/src/tests/topp/NoiseFilterGaussian_2_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/NoiseFilterSGolay_2_parameters.ini b/src/tests/topp/NoiseFilterSGolay_2_parameters.ini index a76de0f2ef8..85ed4cdae13 100644 --- a/src/tests/topp/NoiseFilterSGolay_2_parameters.ini +++ b/src/tests/topp/NoiseFilterSGolay_2_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/NucleicAcidSearchEngine_1.ini b/src/tests/topp/NucleicAcidSearchEngine_1.ini index f3f9a903b07..85a138b216d 100644 --- a/src/tests/topp/NucleicAcidSearchEngine_1.ini +++ b/src/tests/topp/NucleicAcidSearchEngine_1.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/OpenPepXLLF_input2.ini b/src/tests/topp/OpenPepXLLF_input2.ini index 26a1dc4530e..39a1d71db52 100644 --- a/src/tests/topp/OpenPepXLLF_input2.ini +++ b/src/tests/topp/OpenPepXLLF_input2.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/OpenPepXL_input.ini b/src/tests/topp/OpenPepXL_input.ini index 24fb504c6b3..e0181ef3ad4 100644 --- a/src/tests/topp/OpenPepXL_input.ini +++ b/src/tests/topp/OpenPepXL_input.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/OpenSwathAnalyzer_5.ini b/src/tests/topp/OpenSwathAnalyzer_5.ini index 967c537cec9..218a16b7557 100644 --- a/src/tests/topp/OpenSwathAnalyzer_5.ini +++ b/src/tests/topp/OpenSwathAnalyzer_5.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/OpenSwathAnalyzer_7_backgroundSubtraction.ini b/src/tests/topp/OpenSwathAnalyzer_7_backgroundSubtraction.ini index a9b85b1c512..2e60f26c307 100644 --- a/src/tests/topp/OpenSwathAnalyzer_7_backgroundSubtraction.ini +++ b/src/tests/topp/OpenSwathAnalyzer_7_backgroundSubtraction.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/PeakPickerHiRes_parameters.ini b/src/tests/topp/PeakPickerHiRes_parameters.ini index 5a5e6fcaabf..518c183ee27 100644 --- a/src/tests/topp/PeakPickerHiRes_parameters.ini +++ b/src/tests/topp/PeakPickerHiRes_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/PeakPickerIterative_1.ini b/src/tests/topp/PeakPickerIterative_1.ini index 3ef42fbeb15..0c4538ae19a 100644 --- a/src/tests/topp/PeakPickerIterative_1.ini +++ b/src/tests/topp/PeakPickerIterative_1.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/PeakPickerIterative_2.ini b/src/tests/topp/PeakPickerIterative_2.ini index 7a2a0994fdb..7e05dbae088 100755 --- a/src/tests/topp/PeakPickerIterative_2.ini +++ b/src/tests/topp/PeakPickerIterative_2.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/SimpleSearchEngine_1.ini b/src/tests/topp/SimpleSearchEngine_1.ini index 1ac4622d3f2..c1e5f4e126f 100644 --- a/src/tests/topp/SimpleSearchEngine_1.ini +++ b/src/tests/topp/SimpleSearchEngine_1.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/SimpleSearchEngine_2.ini b/src/tests/topp/SimpleSearchEngine_2.ini index 4ae1affc358..5c6aec59162 100644 --- a/src/tests/topp/SimpleSearchEngine_2.ini +++ b/src/tests/topp/SimpleSearchEngine_2.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/SpecLibSearcher_1_parameters.ini b/src/tests/topp/SpecLibSearcher_1_parameters.ini index e1cb4d9ef75..bdcad9107f9 100644 --- a/src/tests/topp/SpecLibSearcher_1_parameters.ini +++ b/src/tests/topp/SpecLibSearcher_1_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/THIRDPARTY/CometAdapter_1.ini b/src/tests/topp/THIRDPARTY/CometAdapter_1.ini index 8d1d9a460f6..7fdc8e87bb6 100644 --- a/src/tests/topp/THIRDPARTY/CometAdapter_1.ini +++ b/src/tests/topp/THIRDPARTY/CometAdapter_1.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/THIRDPARTY/CometAdapter_3.ini b/src/tests/topp/THIRDPARTY/CometAdapter_3.ini index 5a52a9ae791..00d617feba8 100644 --- a/src/tests/topp/THIRDPARTY/CometAdapter_3.ini +++ b/src/tests/topp/THIRDPARTY/CometAdapter_3.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/THIRDPARTY/MSGFPlusAdapter_1.ini b/src/tests/topp/THIRDPARTY/MSGFPlusAdapter_1.ini index 640e40277d0..ffbe8be8f50 100644 --- a/src/tests/topp/THIRDPARTY/MSGFPlusAdapter_1.ini +++ b/src/tests/topp/THIRDPARTY/MSGFPlusAdapter_1.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/THIRDPARTY/MaRaClusterAdapter_1.ini b/src/tests/topp/THIRDPARTY/MaRaClusterAdapter_1.ini index c0ad12da28c..bef66ae26d5 100644 --- a/src/tests/topp/THIRDPARTY/MaRaClusterAdapter_1.ini +++ b/src/tests/topp/THIRDPARTY/MaRaClusterAdapter_1.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/THIRDPARTY/MaRaClusterAdapter_2.ini b/src/tests/topp/THIRDPARTY/MaRaClusterAdapter_2.ini index c0ad12da28c..bef66ae26d5 100644 --- a/src/tests/topp/THIRDPARTY/MaRaClusterAdapter_2.ini +++ b/src/tests/topp/THIRDPARTY/MaRaClusterAdapter_2.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/THIRDPARTY/MascotAdapterOnline_1.ini b/src/tests/topp/THIRDPARTY/MascotAdapterOnline_1.ini index 9b5fb049137..76547316724 100644 --- a/src/tests/topp/THIRDPARTY/MascotAdapterOnline_1.ini +++ b/src/tests/topp/THIRDPARTY/MascotAdapterOnline_1.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/THIRDPARTY/PercolatorAdapter_1.ini b/src/tests/topp/THIRDPARTY/PercolatorAdapter_1.ini index d2551907a00..7f147109d00 100644 --- a/src/tests/topp/THIRDPARTY/PercolatorAdapter_1.ini +++ b/src/tests/topp/THIRDPARTY/PercolatorAdapter_1.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/THIRDPARTY/SageAdapter_1.ini b/src/tests/topp/THIRDPARTY/SageAdapter_1.ini index 4da646de26e..cb576f38aea 100644 --- a/src/tests/topp/THIRDPARTY/SageAdapter_1.ini +++ b/src/tests/topp/THIRDPARTY/SageAdapter_1.ini @@ -1,14 +1,14 @@ - + - + @@ -19,7 +19,7 @@ - + @@ -31,13 +31,28 @@ + + + + + + + + + + + + + + + + - - + @@ -50,6 +65,10 @@ + + + + diff --git a/src/tests/topp/THIRDPARTY/SageAdapter_1_out.idXML b/src/tests/topp/THIRDPARTY/SageAdapter_1_out.idXML index 999dc827a09..10ed2e21077 100644 --- a/src/tests/topp/THIRDPARTY/SageAdapter_1_out.idXML +++ b/src/tests/topp/THIRDPARTY/SageAdapter_1_out.idXML @@ -1,14 +1,14 @@ - + - + - - + + @@ -36,10 +36,17 @@ + + + + + + + + - @@ -58,29 +65,33 @@ - + - - + + + - + + + - - + + - - + + + - + diff --git a/src/tests/topp/THIRDPARTY/XTandemAdapter_1.ini b/src/tests/topp/THIRDPARTY/XTandemAdapter_1.ini index 8ca665d5a71..138be1c17ac 100644 --- a/src/tests/topp/THIRDPARTY/XTandemAdapter_1.ini +++ b/src/tests/topp/THIRDPARTY/XTandemAdapter_1.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/THIRDPARTY/matched_fragments.sage.tsv b/src/tests/topp/THIRDPARTY/matched_fragments.sage.tsv new file mode 100644 index 00000000000..06234e0c23b --- /dev/null +++ b/src/tests/topp/THIRDPARTY/matched_fragments.sage.tsv @@ -0,0 +1,29 @@ +psm_id fragment_type fragment_ordinals fragment_charge fragment_mz_calculated fragment_mz_experimental fragment_intensity +1 b 2 1 242.14992 242.14989 540839.4 +1 b 4 1 485.28305 485.28275 552907.0 +1 b 5 1 582.3358 582.33417 249718.42 +1 b 6 1 653.3729 653.3723 448620.62 +1 b 7 1 724.41003 724.4097 7813033.5 +1 b 7 2 362.70865 362.70834 240484.1 +1 b 8 1 821.46277 821.4622 348964.8 +1 b 8 2 411.23502 411.235 1077902.5 +1 b 9 2 459.76138 459.76132 257510.27 +1 b 10 1 989.5526 989.55237 7521718.0 +1 b 10 2 495.27994 495.27847 315987.97 +1 b 12 1 1143.627 1143.6226 443099.75 +1 y 17 2 843.4759 843.4913 637089.7 +1 y 12 2 602.3459 602.3467 691258.44 +1 y 11 1 1106.6318 1106.6305 1731329.5 +1 y 10 1 1009.57904 1009.57855 1041480.6 +1 y 9 1 938.54193 938.5417 9045039.0 +1 y 9 2 469.7746 469.77466 2463108.8 +1 y 8 1 841.4892 841.4886 4294338.0 +1 y 8 2 421.24823 421.24786 259093.06 +1 y 7 1 784.4677 784.46735 3691260.2 +1 y 7 2 392.7375 392.73746 983722.6 +1 y 6 1 687.415 687.4143 1646364.6 +1 y 5 1 630.3935 630.3935 603377.6 +1 y 4 1 502.3349 502.33484 1054908.6 +1 y 3 1 389.25085 389.251 712993.75 +1 y 2 1 288.2032 288.20273 213723.94 +1 y 1 1 175.11914 175.11903 675764.75 diff --git a/src/tests/topp/THIRDPARTY/third_party_tests.cmake b/src/tests/topp/THIRDPARTY/third_party_tests.cmake index 84d38d08711..a88ebe49ebc 100644 --- a/src/tests/topp/THIRDPARTY/third_party_tests.cmake +++ b/src/tests/topp/THIRDPARTY/third_party_tests.cmake @@ -127,7 +127,7 @@ endif() if (NOT (${SAGE_BINARY} STREQUAL "SAGE_BINARY-NOTFOUND")) ### NOT needs to be added after the binarys have been included add_test("TOPP_SageAdapter_1" ${TOPP_BIN_PATH}/SageAdapter -test -ini ${DATA_DIR_TOPP}/THIRDPARTY/SageAdapter_1.ini -database ${DATA_DIR_TOPP}/THIRDPARTY/SageAdapter_1.fasta -in ${DATA_DIR_TOPP}/THIRDPARTY/SageAdapter_1.mzML -out SageAdapter_1_out.tmp.idXML -sage_executable "${SAGE_BINARY}") - add_test("TOPP_SageAdapter_1_out1" ${DIFF} -in1 SageAdapter_1_out.tmp.idXML -in2 ${DATA_DIR_TOPP}/THIRDPARTY/SageAdapter_1_out.idXML -whitelist "search_engine_version" "IdentificationRun date" "spectra_data" "SearchParameters id=\"SP_0\" db=" "UserParam type=\"stringList\" name=\"SageAdapter:1:in\" value=" "UserParam type=\"string\" name=\"SageAdapter:1:database\" value=" "UserParam type=\"string\" name=\"SageAdapter:1:sage_executable\" value=") + add_test("TOPP_SageAdapter_1_out1" ${DIFF} -in1 SageAdapter_1_out.tmp.idXML -in2 ${DATA_DIR_TOPP}/THIRDPARTY/SageAdapter_1_out.idXML -whitelist "search_engine_version" "IdentificationRun date" "spectra_data" "SearchParameters id="SP_0" db=" "UserParam type="stringList" name="SageAdapter:1:in" value=" "UserParam type="string" name="SageAdapter:1:database" value=" "UserParam type="string" name="SageAdapter:1:sage_executable" value=" "fragment_annotation") set_tests_properties("TOPP_SageAdapter_1_out1" PROPERTIES DEPENDS "TOPP_SageAdapter_1") endif() diff --git a/src/tests/topp/TMTElevenPlexMethod_test.ini b/src/tests/topp/TMTElevenPlexMethod_test.ini index 6188c3185fe..4c3273289f0 100644 --- a/src/tests/topp/TMTElevenPlexMethod_test.ini +++ b/src/tests/topp/TMTElevenPlexMethod_test.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/TMTTenPlexMethod_test.ini b/src/tests/topp/TMTTenPlexMethod_test.ini index 82cc41aae69..dc73c56d57d 100644 --- a/src/tests/topp/TMTTenPlexMethod_test.ini +++ b/src/tests/topp/TMTTenPlexMethod_test.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/TextExporter_2_parameters.ini b/src/tests/topp/TextExporter_2_parameters.ini index a2a2a90cd4b..1efaff2e83e 100644 --- a/src/tests/topp/TextExporter_2_parameters.ini +++ b/src/tests/topp/TextExporter_2_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/TextExporter_7_parameters.ini b/src/tests/topp/TextExporter_7_parameters.ini index da68ac5cfd9..c73fd8c702e 100644 --- a/src/tests/topp/TextExporter_7_parameters.ini +++ b/src/tests/topp/TextExporter_7_parameters.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/WRITE_INI_IN.ini b/src/tests/topp/WRITE_INI_IN.ini index a851e5b7ce8..9517636fc7d 100644 --- a/src/tests/topp/WRITE_INI_IN.ini +++ b/src/tests/topp/WRITE_INI_IN.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/WRITE_INI_OUT.ini b/src/tests/topp/WRITE_INI_OUT.ini index 9e7ff4e8df0..7b59fa38b34 100644 --- a/src/tests/topp/WRITE_INI_OUT.ini +++ b/src/tests/topp/WRITE_INI_OUT.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/degenerate_cases/PPHiRes_invalidParamName.ini b/src/tests/topp/degenerate_cases/PPHiRes_invalidParamName.ini index c634b512a34..efbc64958c0 100644 --- a/src/tests/topp/degenerate_cases/PPHiRes_invalidParamName.ini +++ b/src/tests/topp/degenerate_cases/PPHiRes_invalidParamName.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/degenerate_cases/PPHiRes_invalidValue.ini b/src/tests/topp/degenerate_cases/PPHiRes_invalidValue.ini index 7ec77192496..f4c219b0800 100644 --- a/src/tests/topp/degenerate_cases/PPHiRes_invalidValue.ini +++ b/src/tests/topp/degenerate_cases/PPHiRes_invalidValue.ini @@ -1,7 +1,7 @@ - + diff --git a/src/tests/topp/degenerate_cases/PPHiRes_invalidValueSection.ini b/src/tests/topp/degenerate_cases/PPHiRes_invalidValueSection.ini index 02676d0446d..6a6621d2683 100644 --- a/src/tests/topp/degenerate_cases/PPHiRes_invalidValueSection.ini +++ b/src/tests/topp/degenerate_cases/PPHiRes_invalidValueSection.ini @@ -1,7 +1,7 @@ - + diff --git a/src/topp/CometAdapter.cpp b/src/topp/CometAdapter.cpp index c400f018f53..bef7e83e190 100644 --- a/src/topp/CometAdapter.cpp +++ b/src/topp/CometAdapter.cpp @@ -294,8 +294,8 @@ class TOPPCometAdapter : { OPENMS_LOG_WARN << "Comet v2024.01.0 is known to have several bugs (see https://github.com/UWPR/Comet/issues/63). Please use a different version if possible." << std::endl; } - // Comet v2024.01.0 introduces “peptide_mass_tolerance_lower” and “peptide_mass_tolerance_upper” parameters - // and deprecates “peptide_mass_tolerance” (which is buggy in this version, see https://github.com/UWPR/Comet/issues/59) + // Comet v2024.01.0 introduces "peptide_mass_tolerance_lower" and "peptide_mass_tolerance_upper" parameters + // and deprecates "peptide_mass_tolerance" (which is buggy in this version, see https://github.com/UWPR/Comet/issues/59) // We need to use the new parameters from this version onwards double precursor_mass_tolerance = getDoubleOption_("precursor_mass_tolerance"); if (comet_year >= 2024) @@ -609,6 +609,8 @@ class TOPPCometAdapter : os << "10. Chymotrypsin 1 FWYL P" << "\n"; os << "11. No_cut 1 @ @" << "\n"; os << "12. Arg-C/P 1. R _" << "\n"; + os << "13. Lys-C/P 1 K -" << "\n"; + os << "14. Leukocyte_elastase 1 ALIV -" << "\n"; return ExitCodes::EXECUTION_OK; } diff --git a/src/topp/DecoyDatabase.cpp b/src/topp/DecoyDatabase.cpp index c67f7caf6d0..e78b8c7126e 100644 --- a/src/topp/DecoyDatabase.cpp +++ b/src/topp/DecoyDatabase.cpp @@ -3,20 +3,23 @@ // // -------------------------------------------------------------------------- // $Maintainer: Sven Nahnsen $ -// $Authors: Sven Nahnsen, Andreas Bertsch, Chris Bielow $ +// $Authors: Sven Nahnsen, Andreas Bertsch, Chris Bielow, Philipp Wang $ // -------------------------------------------------------------------------- -#include -#include -#include -#include +#include #include -#include #include -#include +#include +#include +#include #include +#include +#include +#include + #include + using namespace OpenMS; using namespace std; @@ -50,10 +53,17 @@ The tool will keep track of all protein identifiers and report duplicates. Also the tool automatically checks for decoys already in the input files (based on most common pre-/suffixes) and terminates the program if decoys are found. +Extra functionality: +The Neighbor Peptide functionality (see subsection 'NeighborSearch') is designed to find peptides (neighbors) in a given set of sequences (FASTA file) that are +similar to a target peptide (aka relevant peptide) based on mass and spectral characteristics. This provides more power +when searching complex samples, but only a subset of the peptides/proteins is of interest. +See www.ncbi.nlm.nih.gov/pmc/articles/PMC8489664/ and NeighborSeq for details. + The command line parameters of this tool are: @verbinclude TOPP_DecoyDatabase.cli INI file documentation of this tool: @htmlinclude TOPP_DecoyDatabase.html + */ // We do not want this class to show up in the docu: @@ -64,7 +74,7 @@ class TOPPDecoyDatabase : { public: TOPPDecoyDatabase() : - TOPPBase("DecoyDatabase", "Create decoy sequence database from forward sequence database.") + TOPPBase("DecoyDatabase", "Creates combined target+decoy sequence database from forward sequence database.") { } @@ -72,23 +82,23 @@ class TOPPDecoyDatabase : void registerOptionsAndFlags_() override { registerInputFileList_("in", "", ListUtils::create(""), "Input FASTA file(s), each containing a database. It is recommended to include a contaminant database as well."); - setValidFormats_("in", ListUtils::create("fasta")); - registerOutputFile_("out", "", "", "Output FASTA file where the decoy database will be written to."); - setValidFormats_("out", ListUtils::create("fasta")); + setValidFormats_("in", {"fasta"}); + registerOutputFile_("out", "", "", "Output FASTA file where the decoy database (target + decoy or only decoy, see 'only_decoy') will be written to."); + setValidFormats_("out", {"fasta"}); registerStringOption_("decoy_string", "", "DECOY_", "String that is combined with the accession of the protein identifier to indicate a decoy protein.", false); registerStringOption_("decoy_string_position", "", "prefix", "Should the 'decoy_string' be prepended (prefix) or appended (suffix) to the protein accession?", false); - setValidStrings_("decoy_string_position", ListUtils::create("prefix,suffix")); + setValidStrings_("decoy_string_position", {"prefix", "suffix"}); registerFlag_("only_decoy", "Write only decoy proteins to the output database instead of a combined database.", false); registerStringOption_("type", "", "protein", "Type of sequence. RNA sequences may contain modification codes, which will be handled correctly if this is set to 'RNA'.", false); - setValidStrings_("type", ListUtils::create("protein,RNA")); + setValidStrings_("type", {"protein", "RNA"}); registerStringOption_("method", "", "reverse", "Method by which decoy sequences are generated from target sequences. Note that all sequences are shuffled using the same random seed, ensuring that identical sequences produce the same shuffled decoy sequences. Shuffled sequences that produce highly similar output sequences are shuffled again (see shuffle_sequence_identity_threshold).", false); - setValidStrings_("method", ListUtils::create("reverse,shuffle")); + setValidStrings_("method", {"reverse", "shuffle"}); registerIntOption_("shuffle_max_attempts", "", 30, "shuffle: maximum attempts to lower the amino acid sequence identity between target and decoy for the shuffle algorithm", false, true); registerDoubleOption_("shuffle_sequence_identity_threshold", "", 0.5, "shuffle: target-decoy amino acid sequence identity threshold for the shuffle algorithm. If the sequence identity is above this threshold, shuffling is repeated. In case of repeated failure, individual amino acids are 'mutated' to produce a different amino acid sequence.", false, true); - registerStringOption_("seed", "", '1', "Random number seed (use 'time' for system time)", false, true); + registerStringOption_("seed", "", '1', "Random number seed (use 'time' for system time)", false, true); StringList all_enzymes; ProteaseDB::getInstance()->getAllNames(all_enzymes); @@ -96,8 +106,25 @@ class TOPPDecoyDatabase : setValidStrings_("enzyme", all_enzymes); registerSubsection_("Decoy", "Decoy parameters section"); + + // New options for neighbor peptide search + registerTOPPSubsection_("NeighborSearch", "Parameters for neighbor peptide search ('in' holds the neighbor candidates)"); + registerInputFile_("NeighborSearch:in_relevant_proteins", "","", "These are the relevant proteins, for which we seek neighbors", false); + setValidFormats_("NeighborSearch:in_relevant_proteins", {"fasta"}); + registerOutputFile_("NeighborSearch:out_neighbor", "", "", "Output FASTA file with neighbors of relevant peptides (given in 'in_relevant_proteins').",false); + registerOutputFile_("NeighborSearch:out_relevant", "", "", + "Output FASTA file with target+decoy of relevant peptides (given in 'in_relevant_proteins'). Required for downstream filtering of search results via IDFilter and subsequent FDR.", false); + registerIntOption_("NeighborSearch:missed_cleavages", "", 0, "Number of missed cleavages for relevant and neighbor peptides.", false); + registerDoubleOption_("NeighborSearch:mz_bin_size", "", 0.05,"Bin size for spectra m/z comparison (the original study suggests 0.05 Th for high-res and 1.0005079 Th for low-res spectra).", false); + registerDoubleOption_("NeighborSearch:pc_mass_tolerance", "", 0.01, "Maximal precursor mass difference (in Da or ppm; see 'pc_mass_tolerance_unit') between neighbor and relevant peptide.", false); + registerStringOption_("NeighborSearch:pc_mass_tolerance_unit", "", "Da", "Is 'pc_mass_tolerance' in Da or ppm?", false); + setValidStrings_("NeighborSearch:pc_mass_tolerance_unit", {"Da", "ppm"}); + registerIntOption_("NeighborSearch:min_peptide_length", "", 5, "Minimum peptide length (relevant and neighbor peptides)", false); + registerDoubleOption_("NeighborSearch:min_shared_ion_fraction", "", 0.25, + "Minimal required overlap 't_i' of b/y ions shared between neighbor candidate and a relevant peptide (t_i <= 2*B12/(B1+B2)). Higher values result in fewer neighbors.", false); } + Param getSubsectionDefaults_(const String& /* name */) const override { Param p = MRMDecoy().getDefaults(); @@ -106,31 +133,56 @@ class TOPPDecoyDatabase : return p; } - String getIdentifier_(const String& identifier, const String& decoy_string, const bool as_prefix) + String getDecoyIdentifier_(const String& identifier, const String& decoy_string, const bool as_prefix) { if (as_prefix) return decoy_string + identifier; else return identifier + decoy_string; } + ExitCodes main_(int, const char**) override { //------------------------------------------------------------- // parsing parameters //------------------------------------------------------------- - enum SeqType {protein, RNA}; + enum class SeqType {protein, RNA}; StringList in = getStringList_("in"); String out = getStringOption_("out"); + bool append = !getFlag_("only_decoy"); bool shuffle = (getStringOption_("method") == "shuffle"); String decoy_string = getStringOption_("decoy_string"); bool decoy_string_position_prefix = (getStringOption_("decoy_string_position") == "prefix"); - SeqType input_type = SeqType::protein; //default to protein - if (getStringOption_("type") == "RNA") + + // check if decoy_string is common decoy string (e.g. decoy, rev, ...) + String decoy_string_lower = decoy_string; + decoy_string_lower.toLower(); + bool is_common = false; + for (const auto& a : DecoyHelper::affixes) { - input_type = SeqType::RNA; + if ((decoy_string_lower.hasPrefix(a) && decoy_string_position_prefix) || (decoy_string_lower.hasSuffix(a) && ! decoy_string_position_prefix)) + { + is_common = true; + } + } + // terminate, if decoy_string is not one of the allowed decoy strings (exit code 11) + if (! is_common) + { + if (getFlag_("force")) + { + OPENMS_LOG_WARN << "Force Flag is enabled, decoys with custom decoy string (not in DecoyHelper::affixes) will not be detected.\n"; + } + else + { + OPENMS_LOG_FATAL_ERROR << "Given decoy string is not allowed. Please use one of the strings in DecoyHelper::affixes as either prefix or " + "suffix (case insensitive): \n"; + return INCOMPATIBLE_INPUT_DATA; + } } + const SeqType input_type = getStringOption_("type") == "RNA" ? SeqType::RNA : SeqType::protein; + Param decoy_param = getParam_().copy("Decoy:", true); bool keepN = decoy_param.getValue("keepPeptideNTerm").toBool(); bool keepC = decoy_param.getValue("keepPeptideCTerm").toBool(); @@ -144,10 +196,15 @@ class TOPPDecoyDatabase : // every time (without keeping track of them explicitly). This // will ensure that the total number of unique tryptic peptides // is identical in both databases. - int seed; + ; String seed_option(getStringOption_("seed")); - if (seed_option == "time") seed = time(nullptr); - else seed = seed_option.toInt(); + const int seed = (seed_option == "time") ? time(nullptr) : seed_option.toInt(); + + // Configure Enzymatic digestion + // TODO: allow user-specified regex + ProteaseDigestion digestion; + String enzyme = getStringOption_("enzyme").trim(); + if ((input_type == SeqType::protein) && ! enzyme.empty()) { digestion.setEnzyme(enzyme); } //------------------------------------------------------------- // reading input @@ -156,65 +213,155 @@ class TOPPDecoyDatabase : if (in.size() == 1) { OPENMS_LOG_WARN << "Warning: Only one FASTA input file was provided, which might not contain contaminants. " - << "You probably want to have them! Just add the contaminant file to the input file list 'in'." << endl; + << "You probably want to have them! Just add the contaminant file to the input file list 'in'." << endl; } - set identifiers; // spot duplicate identifiers // std::unordered_set has slightly more RAM, but slightly less CPU + // do this first, before potentially entering neighbor mode (which modifies the 'in' list) + for (const auto& file_fasta : in) + { + // check input files for decoys + FASTAContainer in_entries {file_fasta}; + auto r = DecoyHelper::countDecoys(in_entries); + // if decoys found, terminates with exit code INCOMPATIBLE_INPUT_DATA + if (static_cast(r.all_prefix_occur + r.all_suffix_occur) >= 0.4 * static_cast(r.all_proteins_count)) + { + OPENMS_LOG_FATAL_ERROR << "Invalid input in " + file_fasta + ": Input file already contains decoys." << '\n'; + return INCOMPATIBLE_INPUT_DATA; + } + } - FASTAFile f; - f.writeStart(out); - FASTAFile::FASTAEntry entry; - // Configure Enzymatic digestion - // TODO: allow user-specified regex - ProteaseDigestion digestion; - String enzyme = getStringOption_("enzyme").trim(); - if ((input_type == SeqType::protein) && !enzyme.empty()) + // create neighbor peptides for the relevant peptides? + String in_relevant_proteins = getStringOption_("NeighborSearch:in_relevant_proteins"); + String out_relevant = getStringOption_("NeighborSearch:out_relevant"); + String out_neighbor = getStringOption_("NeighborSearch:out_neighbor"); + if (in_relevant_proteins.empty() ^ out_relevant.empty()) { - digestion.setEnzyme(enzyme); + OPENMS_LOG_ERROR << "Parameter settings are invalid. Both 'in_relevant_proteins' and 'out_relevant' must be set or unset.\n"; + return ILLEGAL_PARAMETERS; } - // check if decoy_string is common decoy string (e.g. decoy, rev, ...) - String decoy_string_lower = decoy_string; - decoy_string_lower.toLower(); - bool is_common = false; - for (const auto& a : DecoyHelper::affixes) + const bool neighbor_mode = ! in_relevant_proteins.empty(); + if (!neighbor_mode && !out_neighbor.empty()) { - if ((decoy_string_lower.hasPrefix(a) && decoy_string_position_prefix) || (decoy_string_lower.hasSuffix(a) && !decoy_string_position_prefix)) - { - is_common = true; - } + OPENMS_LOG_ERROR << "Parameter settings are invalid. You requested neighbor peptides via 'NeighborSearch:out_neighbor', but failed specify the required input ('NeighborSearch:in_relevant_proteins').\n"; + return ILLEGAL_PARAMETERS; } - // terminate, if decoy_string is not one of the allowed decoy strings (exit code 11) - if (!is_common) + if (neighbor_mode) { - if (getFlag_("force")) + if (input_type != SeqType::protein) { - OPENMS_LOG_WARN << "Force Flag is enabled, decoys with custom decoy string (not in DecoyHelper::affixes) will not be detected.\n"; + OPENMS_LOG_ERROR << "Parameter settings are invalid. When requesting neighbor peptides, the input type must be 'protein', not 'RNA'.\n"; + return INCOMPATIBLE_INPUT_DATA; } - else + + if (out_neighbor.empty()) + { // make it a temp file, since we need to append its content to the final 'out' DB + out_neighbor = File::getTemporaryFile(out_neighbor); + } + + //------------------------------------------------------------- + // parsing neighbor parameters + //------------------------------------------------------------- + + FASTAFile fasta_neighbor_out; + fasta_neighbor_out.writeStart(out_neighbor); + + double mz_bin_size = getDoubleOption_("NeighborSearch:mz_bin_size"); + double min_shared_ion_fraction = getDoubleOption_("NeighborSearch:min_shared_ion_fraction"); + double mass_tolerance = getDoubleOption_("NeighborSearch:pc_mass_tolerance"); + bool mass_tolerance_unit_ppm = getStringOption_("NeighborSearch:pc_mass_tolerance_unit") == "ppm"; + int missed_cleavages = getIntOption_("NeighborSearch:missed_cleavages"); + int min_peptide_length = getIntOption_("NeighborSearch:min_peptide_length"); + // Create a ProteaseDigestion object for neighbor peptide digestion + // (it's not identical to the one used for creating decoys, because we need to consider missed cleavages) + ProteaseDigestion digestion_neighbor; + digestion_neighbor.setMissedCleavages(missed_cleavages); + if (! enzyme.empty()) { digestion_neighbor.setEnzyme(getStringOption_("enzyme").trim()); } + // Load the relevant proteins from 'NeighborSearch:in_relevant_proteins' + vector relevant_proteins; + FASTAFile().load(in_relevant_proteins, relevant_proteins); + + vector digested_relevant_peptides; + vector temp_peptides; + for (const auto& entry : relevant_proteins) { - OPENMS_LOG_FATAL_ERROR << "Given decoy string is not allowed. Please use one of the strings in DecoyHelper::affixes as either prefix or suffix (case insensitive): \n"; - return INCOMPATIBLE_INPUT_DATA; + digestion_neighbor.digest(AASequence::fromString(entry.sequence), temp_peptides, min_peptide_length); + digested_relevant_peptides.insert(digested_relevant_peptides.end(), make_move_iterator(temp_peptides.begin()), make_move_iterator(temp_peptides.end())); } + + NeighborSeq ns(std::move(digested_relevant_peptides)); + + // find neighbor peptides in 'in' for each relevant peptide in 'NeighborSearch:in_relevant_proteins' + for (Size i = 0; i < in.size(); ++i) + { + const auto x_residue = *ResidueDB::getInstance()->getResidue('X'); + FASTAFile fasta_in; + fasta_in.setLogType(log_type_); + fasta_in.readStartWithProgress(in[i], "Finding Neighbors in '" + in[i] + "'"); + FASTAFile::FASTAEntry entry; + vector digested_candidate_peptides; + while (fasta_in.readNextWithProgress(entry)) + { + digestion_neighbor.digest(AASequence::fromString(entry.sequence), digested_candidate_peptides, min_peptide_length); + entry.sequence.clear(); // reset sequence; later append valid candidates (if any) + entry.identifier = "neighbor_" + entry.identifier; + for (auto& peptide : digested_candidate_peptides) + { + if (peptide.has(x_residue)) + { // 'X' in peptide prevents us from computing a PC mass and a spectrum + continue; + } + // Find relevant peptides for the current neighbor peptide candidate + bool is_neighbor_peptide = ns.isNeighborPeptide(peptide, mass_tolerance, mass_tolerance_unit_ppm, min_shared_ion_fraction, mz_bin_size); + if (!is_neighbor_peptide) continue; + entry.sequence += peptide.toString(); + } // next candidate peptide + if (!entry.sequence.empty()) + { + fasta_neighbor_out.writeNext(entry); + } + } // next candidate protein + } // next input file + + // we only need relevant and neighbor peptides in our final DB: + in.clear(); + // add relevant proteins FASTA file to the input list (to also create decoys for them) + in.push_back(in_relevant_proteins); + // add neighbor peptides FASTA file to the input list (to also create decoys for them) + in.push_back(out_neighbor); + + const auto stats = ns.getNeighborStats(); + OPENMS_LOG_INFO << "Neighbor peptide statistics for " << stats.total() << " reference peptides :\n" + << " - " << stats.unfindable() << " peptides contained an 'X' (unknown amino acid) and thus could not be searched for neighbors\n" + << " - " << stats.noNB() << " peptides had 0 neighbors\n" + << " - " << stats.oneNB() << " peptides had 1 neighbor\n" + << " - " << stats.multiNB() << " peptides had >=1 neighbors." << endl; + } + + set identifiers; // spot duplicate identifiers // std::unordered_set has slightly more RAM, but slightly less CPU + + FASTAFile f; + f.writeStart(out); + + + FASTAFile fasta_out_relevant; /// in neighbor-peptide mode: write relevant peptides to the output file + if (neighbor_mode) + { + fasta_out_relevant.writeStart(out_relevant); } MRMDecoy m; m.setParameters(decoy_param); Math::RandomShuffler shuffler(seed); - for (Size i = 0; i < in.size(); ++i) + for (const auto& file_fasta : in) { - // check input files for decoys - FASTAContainer in_entries{in[i]}; - auto r = DecoyHelper::countDecoys(in_entries); - // if decoys found, throw exception - if (static_cast(r.all_prefix_occur + r.all_suffix_occur) >= 0.4 * static_cast(r.all_proteins_count)) - { - // if decoys found, program terminates with exit code 11 - OPENMS_LOG_FATAL_ERROR << "Invalid input in " + in[i] + ": Input file already contains decoys." << '\n'; - return INCOMPATIBLE_INPUT_DATA; - } + /// in neighbor-peptide mode: write relevant peptides to the output file + const bool write_relevant = neighbor_mode && file_fasta == in_relevant_proteins; + + f.readStart(file_fasta); + FASTAFile::FASTAEntry entry; + OpenMS::TargetedExperiment::Peptide p; - f.readStart(in[i]); //------------------------------------------------------------- // calculations //------------------------------------------------------------- @@ -229,25 +376,26 @@ class TOPPDecoyDatabase : if (append) { f.writeNext(entry); + if (write_relevant) + { + fasta_out_relevant.writeNext(entry); + } } - // identifier - entry.identifier = getIdentifier_(entry.identifier, decoy_string, decoy_string_position_prefix); + // new decoy identifier + entry.identifier = getDecoyIdentifier_(entry.identifier, decoy_string, decoy_string_position_prefix); - // sequence + // new decoy sequence if (input_type == SeqType::RNA) { string quick_seq = entry.sequence; bool five_p = (entry.sequence.front() == 'p'); bool three_p = (entry.sequence.back() == 'p'); - if (five_p) //we don't want to reverse terminal phosphates + if (five_p) // we don't want to reverse terminal phosphates { quick_seq.erase(0, 1); } - if (three_p) - { - quick_seq.pop_back(); - } + if (three_p) { quick_seq.pop_back(); } vector tokenized; std::smatch m; @@ -256,23 +404,20 @@ class TOPPDecoyDatabase : while (std::regex_search(quick_seq, m, re)) { - tokenized.emplace_back(m.str(0)); - quick_seq = m.suffix(); + tokenized.emplace_back(m.str(0)); + quick_seq = m.suffix(); } - if (shuffle) - { - shuffler.portable_random_shuffle(tokenized.begin(), tokenized.end()); - } - else // reverse + if (shuffle) { shuffler.portable_random_shuffle(tokenized.begin(), tokenized.end()); } + else // reverse { - reverse(tokenized.begin(), tokenized.end()); //reverse the tokens + reverse(tokenized.begin(), tokenized.end()); // reverse the tokens } - if (five_p) //add back 5' + if (five_p) // add back 5' { tokenized.insert(tokenized.begin(), String("p")); } - if (three_p) //add back 3' + if (three_p) // add back 3' { tokenized.emplace_back("p"); } @@ -288,26 +433,16 @@ class TOPPDecoyDatabase : String new_sequence = ""; for (auto const& peptide : peptides) { - //TODO why are the functions from TargetedExperiment and MRMDecoy not anywhere more general? - // No soul would look there. - if (shuffle) - { - OpenMS::TargetedExperiment::Peptide p; - p.sequence = peptide.toString(); - OpenMS::TargetedExperiment::Peptide decoy_p = m.shufflePeptide(p, identity_threshold, seed, max_attempts); - new_sequence += decoy_p.sequence; - } - else - { - OpenMS::TargetedExperiment::Peptide p; - p.sequence = peptide.toString(); - OpenMS::TargetedExperiment::Peptide decoy_p = MRMDecoy::reversePeptide(p, keepN, keepC, keep_const_pattern); - new_sequence += decoy_p.sequence; - } + p.sequence = peptide.toString(); + // TODO why are the functions from TargetedExperiment and MRMDecoy not anywhere more general? + // No soul would look there. + auto decoy_p = shuffle ? m.shufflePeptide(p, identity_threshold, seed, max_attempts) + : MRMDecoy::reversePeptide(p, keepN, keepC, keep_const_pattern); + new_sequence += decoy_p.sequence; } entry.sequence = new_sequence; } - else + else // no cleavage { // sequence if (shuffle) @@ -320,18 +455,23 @@ class TOPPDecoyDatabase : entry.sequence.reverse(); } } - } + } // protein entry //------------------------------------------------------------- // writing output //------------------------------------------------------------- f.writeNext(entry); - } // next protein - } // input files + // optional: if in neighbor mode: T+D of relevant peptides (if requested) + if (write_relevant) + { + fasta_out_relevant.writeNext(entry); + } + } // next protein + } // input files + return EXECUTION_OK; } - }; diff --git a/src/topp/OpenMSInfo.cpp b/src/topp/OpenMSInfo.cpp index e5b0f3acf5c..404ceccaa1e 100644 --- a/src/topp/OpenMSInfo.cpp +++ b/src/topp/OpenMSInfo.cpp @@ -51,8 +51,8 @@ To cite OpenMS: data. Nat Methods (2024). doi:10.1038/s41592-024-02197-7. << OpenMS Version >> -Version : 3.1.0 -Build time : Mar 4 2024, 10:42:53 +Version : 3.2.0 +Build time : Sep 18 2024, 14:14:53 Git sha1 : disabled Git branch : disabled @@ -111,7 +111,7 @@ class TOPPOpenMSInfo : public TOPPBase << underline(TOPPBase::getDocumentationURL()) << " " // the space is needed ... << '\n' << bright("To cite OpenMS:\n") << " + " - << is.indent(3) << cite_openms_.toString() << is.indent(0); + << is.indent(3) << cite_openms.toString() << is.indent(0); is << "\n\n" << green("<< OpenMS Version >>\n") diff --git a/src/topp/SageAdapter.cpp b/src/topp/SageAdapter.cpp index 0f2cdf2de92..8feccf03b33 100644 --- a/src/topp/SageAdapter.cpp +++ b/src/topp/SageAdapter.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -29,9 +30,18 @@ #include #include +#include +#include +#include +#include +#include +#include + +#include using namespace OpenMS; using namespace std; +using boost::math::normal; //------------------------------------------------------------- //Doxygen docu @@ -72,8 +82,13 @@ because of limitations in OpenMS' data structures and file formats. /// @cond TOPPCLASSES -/* -*/ +#define CHRONOSET + +#ifndef M_PI + #define M_PI 3.14159265358979323846 +#endif + + class TOPPSageAdapter : public SearchEngineBase @@ -90,6 +105,486 @@ class TOPPSageAdapter : { } + // Saves details of PTMs as well, useful for if more than one PTM is mapped to a given mass + struct modification + { + double count = 0; + vector mass; + int numcharges = 0; + }; + + // Define a struct to hold modification data + struct ModData + { + int count; // Modification rate + String name; // Modification name + int numcharges; // Number of charges + vector masses; // Masses associated with the modification + }; + + // Comparator for approximate comparison of double values + struct FuzzyDoubleComparator { + double epsilon; + FuzzyDoubleComparator(double eps = 1e-9) : epsilon(eps) {} + bool operator()(const double& a, const double& b) const + { + return std::fabs(a - b) >= epsilon && a < b; + } + }; + +// delta mass counts to delta masses +//typedef map CountToDeltaMass; + +typedef map DeltaMassHistogram; // maps delta mass to count +typedef map DeltaMasstoCharge; // maps delta mass to count + +// Gaussian function +static double gaussian(double x, double sigma) { + return exp(-(x*x) / (2 * sigma*sigma)) / (sigma * sqrt(2 * M_PI)); +} + +// Smooths the PTM-mass histogram , uses a Kernel Density Estimation on top of the histogram. +// Smooths the PTM-mass histogram using Gaussian Kernel Density Estimation (KDE). +static DeltaMassHistogram smoothDeltaMassHist(const DeltaMassHistogram& hist, double sigma = 0.001) +{ + if (hist.size() < 3) + { + return hist; //Not enough data points for smoothing + } + // Create a smoothed histogram with a fuzzy comparator for floating-point keys + DeltaMassHistogram smoothed_hist(FuzzyDoubleComparator(1e-9)); + + // Extract delta masses and counts into vectors for efficient access + std::vector deltas; + std::vector counts; + deltas.reserve(hist.size()); + counts.reserve(hist.size()); + + for (const auto& [delta, count] : hist) + { + deltas.push_back(delta); + counts.push_back(count); + } + + const size_t n = deltas.size(); + std::vector smoothed_counts(n, 0.0); + + // Perform Gaussian smoothing + for (size_t i = 0; i < n; ++i) + { + double weight_sum = 0.0; + + for (size_t j = 0; j < n; ++j) + { + double mz_diff = deltas[i] - deltas[j]; + + // Ignore points beyond 3 standard deviations + if (std::abs(mz_diff) > 3.0 * sigma) + continue; + + double weight = gaussian(mz_diff, sigma); + smoothed_counts[i] += weight * counts[j]; + weight_sum += weight; + } + + if (weight_sum != 0.0) + { + smoothed_counts[i] /= weight_sum; + } + } + + // Populate the smoothed histogram + for (size_t i = 0; i < n; ++i) + { + smoothed_hist[deltas[i]] = smoothed_counts[i]; + } + + return smoothed_hist; +} + +// Identifies local maxima in the delta mass histogram based on count threshold and SNR. +static DeltaMassHistogram findPeaksInDeltaMassHistogram(const DeltaMassHistogram& hist, double count_threshold = 0.0, double SNR = 2.0) +{ + if (hist.size() < 3) + { + return hist; // Not enough data points to find peaks + } + + DeltaMassHistogram peaks(FuzzyDoubleComparator(1e-9)); + + // Extract counts to compute noise level (median count) + std::vector counts; + counts.reserve(hist.size()); + + for (const auto& [_, count] : hist) + { + counts.push_back(count); + } + + // Calculate median as noise level + std::nth_element(counts.begin(), counts.begin() + counts.size() / 2, counts.end()); + double noise_level = counts[counts.size() / 2]; + + // Convert histogram to vector for indexed access + std::vector> hist_vector(hist.begin(), hist.end()); + + // Check each point except the first and last for local maxima + for (size_t i = 1; i < hist_vector.size() - 1; ++i) + { + double prev_count = hist_vector[i - 1].second; + double curr_count = hist_vector[i].second; + double next_count = hist_vector[i + 1].second; + + // Check if current point is a local maximum + if (curr_count >= prev_count && curr_count >= next_count && + curr_count > count_threshold && + curr_count / noise_level > SNR) + { + peaks[hist_vector[i].first] = curr_count; + } + } + + return peaks; +} + +// Returns the maxima of a histogram from the delta masses of each peptide. +std::pair getDeltaClusterCenter(const std::vector& pips, bool smoothing = false, bool debug = false) +{ + // Constants + constexpr double deltamass_tolerance = 0.0005; + constexpr double delta_mass_zero_treshold = 0.05; + + // Lambda to round values to the specified tolerance + auto roundToTolerance = [deltamass_tolerance](double value) { + return std::round(value / deltamass_tolerance) * deltamass_tolerance; + }; + + // Data structures to store histogram and charge states + DeltaMassHistogram hist(FuzzyDoubleComparator(1e-9)); + DeltaMasstoCharge num_charges_at_mass(FuzzyDoubleComparator(1e-9)); + std::unordered_map> charge_states; + + // Process each peptide identification + for (const auto& id : pips) + { + const auto& hits = id.getHits(); + for (const auto& hit : hits) + { + // Retrieve delta mass and charge + double delta_mass = hit.getMetaValue("DeltaMass"); + int charge = hit.getCharge(); + + // Ignore delta masses close to zero + if (std::abs(delta_mass) <= delta_mass_zero_treshold) + continue; + + // Round delta mass to bin similar values + double rounded_mass = roundToTolerance(delta_mass); + + // Update histogram count + hist[rounded_mass] += 1.0; + + // Update unique charge count + if (charge_states[rounded_mass].insert(charge).second) + { + num_charges_at_mass[rounded_mass] += 1.0; + } + } + } + + // Prepare results + std::pair results; + results = { hist, num_charges_at_mass }; + + // Apply smoothing if requested + if (smoothing) + { + DeltaMassHistogram smoothed_hist = smoothDeltaMassHist(hist, 0.0001); + DeltaMassHistogram hist_maxima = findPeaksInDeltaMassHistogram(smoothed_hist, 0.0, 3.0); + + // Update charge counts for the smoothed maxima + DeltaMasstoCharge num_charges_at_mass_smoothed(FuzzyDoubleComparator(1e-9)); + for (const auto& [mass, _] : hist_maxima) + { + num_charges_at_mass_smoothed[mass] = num_charges_at_mass[mass]; + } + + // Update results with smoothed data + results = { hist_maxima, num_charges_at_mass_smoothed }; + } + + return results; +} + +//Fucntion that maps a selection of masses to certain PTMs and returns a summary of said PTMs. Also adds PTM for each petide without in-peptide localization. +vector mapDifftoMods(DeltaMassHistogram hist, DeltaMasstoCharge charge_hist, vector& pips, double precursor_mass_tolerance_ = 5, bool precursor_mass_tolerance_unit_ppm = true, String outfile = "") +{ + vector> clusters(hist.size(), vector()); + map mass_of_mods(FuzzyDoubleComparator(1e-9)); + vector> mass_of_mods_vec; + + // Load modifications from the database + vector searchmodifications_names; + ModificationsDB* mod_db = ModificationsDB::getInstance(); + mod_db->getAllSearchModifications(searchmodifications_names); + for (const String& m : searchmodifications_names) + { + const ResidueModification* residue = mod_db->getModification(m); + String res_name = residue->getFullName(); + double res_diffmonoMass = residue->getDiffMonoMass(); + if (res_name.find("substitution") == string::npos) + mass_of_mods[res_diffmonoMass] = res_name; + } + + // Generate combinations of modifications + map combo_mods(FuzzyDoubleComparator(1e-9)); + for (auto mit = mass_of_mods.begin(); mit != mass_of_mods.end(); ++mit) + { + for (auto mit2 = mit; mit2 != mass_of_mods.end(); ++mit2) + { + combo_mods[mit->first + mit2->first] = mit->second + "++" + mit2->second; + } + } + + // Variables for mapping + StringList modnames; + map modifications; + map hist_found; + + // Helper function to add or update modifications + auto addOrUpdateModification = [&](const String& mod_name, double mass, double count, int numcharges) + { + if (modifications.find(mod_name) == modifications.end()) + { + modification modi{}; + modi.mass.push_back(mass); + modi.count = count; + modi.numcharges = numcharges; + modifications[mod_name] = modi; + } + else + { + modifications[mod_name].count += count; + modifications[mod_name].numcharges = max(numcharges, modifications[mod_name].numcharges); + } + }; + + // Mapping with tolerances //TODO: fix code again, add back high_it + for (const auto& hist_entry : hist) + { + //Values from the histogram + double current_cluster_mass = hist_entry.first; + double count = hist_entry.second; + + double lowerbound, upperbound; + + const double epsilon = 1e-8; + + if (precursor_mass_tolerance_unit_ppm) // ppm + { + double tolerance = current_cluster_mass * precursor_mass_tolerance_ * 1e-6; + lowerbound = current_cluster_mass - tolerance; + upperbound = current_cluster_mass + tolerance; + } + else // Dalton + { + lowerbound = current_cluster_mass - precursor_mass_tolerance_; + upperbound = current_cluster_mass + precursor_mass_tolerance_; + } + + // Search for modifications within bounds + bool mapping_found = false; + String mod_name; + double mod_mass = 0.0; + + // Search in single modifications using lower_bound + auto it_lower = mass_of_mods.lower_bound(lowerbound - epsilon); + bool found_lower = false; + if (it_lower != mass_of_mods.end() && fabs(it_lower->first - current_cluster_mass) <= precursor_mass_tolerance_) + { + found_lower = true; + } + + // Search in single modifications using upper_bound + auto it_upper = mass_of_mods.upper_bound(upperbound + epsilon); + bool found_upper = false; + if (it_upper != mass_of_mods.begin()) + { + --it_upper; // Move to the largest element <= upperbound + if (fabs(it_upper->first - current_cluster_mass) <= precursor_mass_tolerance_) + { + found_upper = true; + } + } + + // Compare results from lower_bound and upper_bound + if (found_lower && found_upper) + { + if (it_lower->first == it_upper->first && it_lower->second == it_upper->second) + { + // Both methods found the same modification + mod_name = it_lower->second; + mod_mass = it_lower->first; + hist_found[mod_mass] = mod_name; + mapping_found = true; + } + else + { + // Different results from lower_bound and upper_bound + // Choose the closer one + mod_name = it_lower->second + "//" + it_upper->second; + mod_mass = current_cluster_mass; + hist_found[it_lower->first] = it_lower->second; + hist_found[it_upper->first] = it_upper->second; + mapping_found = true; + } + } + else + { + // Check if modification can be explained by known modifications + for (const auto& hit : hist_found) + { + if (fabs(hit.first - current_cluster_mass) < precursor_mass_tolerance_) + { + addOrUpdateModification(hit.second, hit.first, count, charge_hist[current_cluster_mass]); + mapping_found = true; + break; + } // Check if modification can be explained by a +1 Isotope variant of a known modification + else if (fabs((hit.first + 1) - current_cluster_mass) < precursor_mass_tolerance_) + { + String temp_mod_name = hit.second + "+1Da"; + addOrUpdateModification(temp_mod_name, hit.first + 1, count, charge_hist[current_cluster_mass]); + hist_found[hit.first + 1] = temp_mod_name; + mapping_found = true; + break; + } + } + // Search in combination modifications + if (!mapping_found) + { + auto it = combo_mods.lower_bound(current_cluster_mass - epsilon); + if (it != combo_mods.end() && fabs(it->first - current_cluster_mass) <= precursor_mass_tolerance_ / 10) + { + mod_name = it->second; + mod_mass = it->first; + mapping_found = true; + } + } + } + if (fabs(mod_mass) < precursor_mass_tolerance_) continue; //If the closest mod_mass is too close to 0, continue + + if (mapping_found) + { + modnames.push_back(mod_name); + addOrUpdateModification(mod_name, mod_mass, count, charge_hist[current_cluster_mass]); + } + else + { + // Unknown modification + String unknown_mod_name = "Unknown" + std::to_string(std::round(current_cluster_mass)); + addOrUpdateModification(unknown_mod_name, current_cluster_mass, count, charge_hist[current_cluster_mass]); + } + } + + // Collect all modification data into a vector + vector mods_by_count; + + //Fill vetcor + for (const auto& mod_pair : modifications) + { + ModData mod_data; + mod_data.count = std::round(mod_pair.second.count); + mod_data.name = mod_pair.first; + mod_data.numcharges = mod_pair.second.numcharges; + mod_data.masses = mod_pair.second.mass; + + mods_by_count.push_back(mod_data); + } + + // Sort the modifications based on (numcharges + rate) in descending order + sort(mods_by_count.begin(), mods_by_count.end(), + [](const ModData& a, const ModData& b) + { + return (a.numcharges + a.count) > (b.numcharges + b.count); + }); + + // Add the modifications to the output for each peptide + for (auto& id : pips) + { + auto& hits = id.getHits(); + for (auto& h : hits) + { + double deltamass = h.getMetaValue("DeltaMass"); + String PTM = ""; + + // Check if too close to zero + if (fabs(deltamass) < 0.05) + { + h.setMetaValue("PTM", PTM); + continue; + } + + bool found = false; + // Check with error tolerance if already present in histogram + for (const auto& mit : hist_found) + { + if (fabs(deltamass - mit.first) < precursor_mass_tolerance_) + { + PTM = mit.second; + found = true; + break; + } + } + //Otherwise assign unkwown + if (!found) + { + PTM = "Unknown" + String(deltamass); + } + h.setMetaValue("PTM", PTM); + } + } + // Remove 'idxml' from output file name and write the table + String output_tab = outfile.substr(0, outfile.size() - 5) + "_OutputTable.tsv"; + std::ofstream outfile_stream(output_tab); + + // Check if the file was opened successfully + if (!outfile_stream.is_open()) + { + std::cerr << "Error opening file: " << output_tab << std::endl; + // Handle the error appropriately, e.g., return or exit + return pips; // Assuming pips is the default return value + } + + outfile_stream << "Name\tMass\tModified Peptides (incl. charge variants)\tModified Peptides\n"; + + // Iterate over the data and write to the file + for (const auto& mod_data : mods_by_count) + { + outfile_stream << mod_data.name << '\t'; + + // Output mass or masses + if (mod_data.masses.size() < 2) + { + outfile_stream << mod_data.masses.at(0) << '\t'; + } + else + { + outfile_stream << mod_data.masses.at(0) << "/" << mod_data.masses.at(1) << '\t'; + } + + // Output rounded values + outfile_stream << mod_data.numcharges + mod_data.count << '\t' + << mod_data.count << '\n'; + } + + // Close the file + outfile_stream.close(); + + //Return the peptides with the additional PTM column + return pips; +} + + protected: // create a template-based configuration file for sage // variable values correspond to sage parameter that can be configured via TOPP tool parameter. @@ -141,7 +636,7 @@ class TOPPSageAdapter : }, "max_variable_mods": ##max_variable_mods##, "generate_decoys": false, - "decoy_tag": "##decoy_tag##" + "decoy_tag": "##decoy_prefix##" }, "precursor_tol": { "##precursor_tol_unit##": [ @@ -161,14 +656,14 @@ class TOPPSageAdapter : "isotope_errors": [ ##isotope_errors## ], - "deisotope": false, - "chimera": false, - "wide_window": false, - "predict_rt": false, + "deisotope": ##deisotope##, + "chimera": ##chimera##, + "predict_rt": ##predict_rt##, "min_peaks": ##min_peaks##, "max_peaks": ##max_peaks##, "min_matched_peaks": ##min_matched_peaks##, - "report_psms": ##report_psms## + "report_psms": ##report_psms##, + "wide_window": ##wide_window## } )"; @@ -177,7 +672,7 @@ class TOPPSageAdapter : { String origin; if (mod->getTermSpecificity() == ResidueModification::N_TERM) - { + { origin += "^"; } else if (mod->getTermSpecificity() == ResidueModification::C_TERM) @@ -244,7 +739,14 @@ class TOPPSageAdapter : config_file.substitute("##min_peaks##", String(getIntOption_("min_peaks"))); config_file.substitute("##max_peaks##", String(getIntOption_("max_peaks"))); config_file.substitute("##report_psms##", String(getIntOption_("report_psms"))); - config_file.substitute("##decoy_tag##", String(getStringOption_("decoy_prefix"))); + config_file.substitute("##deisotope##", getStringOption_("deisotope")); + config_file.substitute("##chimera##", getStringOption_("chimera")); + config_file.substitute("##predict_rt##", getStringOption_("predict_rt")); + config_file.substitute("##decoy_prefix##", getStringOption_("decoy_prefix")); + config_file.substitute("##wide_window##", getStringOption_("wide_window")); + + + //Look at decoy handling String enzyme = getStringOption_("enzyme"); String enzyme_details; @@ -320,16 +822,30 @@ class TOPPSageAdapter : { enzyme_details = R"("cleave_at": "")"; - } + } + else if (enzyme == "glutamyl endopeptidase") + { + enzyme_details = + R"("cleave_at": "E", + "restrict": "E", + "c_terminal":true)"; + } + else if (enzyme == "leukocyte elastase") + { + enzyme_details = + R"("cleave_at": "ALIV", + "restrict": null, + "c_terminal":true)"; + } config_file.substitute("##enzyme_details##", enzyme_details); + auto fixed_mods = getStringList_("fixed_modifications"); set fixed_unique(fixed_mods.begin(), fixed_mods.end()); fixed_mods.assign(fixed_unique.begin(), fixed_unique.end()); ModifiedPeptideGenerator::MapToResidueType fixed_mod_map = ModifiedPeptideGenerator::getModifications(fixed_mods); // std::unordered_map val; String static_mods_details = getModDetailsString(fixed_mod_map); - config_file.substitute("##static_mods##", static_mods_details); auto variable_mods = getStringList_("variable_modifications"); set variable_unique(variable_mods.begin(), variable_mods.end()); @@ -337,7 +853,34 @@ class TOPPSageAdapter : ModifiedPeptideGenerator::MapToResidueType variable_mod_map = ModifiedPeptideGenerator::getModifications(variable_mods); String variable_mods_details = getModDetailsString(variable_mod_map); - config_file.substitute("##variable_mods##", variable_mods_details); + //Treat variables as list for sage v0.15 and beyond + StringList static_mods_details_list; + StringList variable_mods_details_list; + + String static_mods_details_split = static_mods_details; + String variable_mods_details_split = variable_mods_details; + static_mods_details_split.split(",", static_mods_details_list); + variable_mods_details_split.split(",", variable_mods_details_list); + + String temp_String_var; + for (auto& x : variable_mods_details_list) + { + StringList temp_split; + x.split(":", temp_split); + + temp_split.insert(temp_split.begin()+1, ":["); + temp_split.insert(temp_split.end(), "]"); + String temp_split_Str = ""; + + for (auto& y : temp_split) + { + temp_split_Str = temp_split_Str + y; + } + temp_String_var = temp_String_var + "," + temp_split_Str ; + } + String temp_String_var_Fin = temp_String_var.substr(1, temp_String_var.size()-1); + config_file.substitute("##static_mods##", static_mods_details); + config_file.substitute("##variable_mods##", temp_String_var_Fin); return config_file; } @@ -405,6 +948,7 @@ class TOPPSageAdapter : "Can be negative. E.g. '-1,3' for considering '-1/0/1/2/3'", false, true); registerStringOption_("charges", "", charges_if_not_annotated, "Range of precursor charges to consider if not annotated in the file." , false, true); + //Search Enzyme vector all_enzymes; @@ -420,12 +964,22 @@ class TOPPSageAdapter : registerStringList_("variable_modifications", "", ListUtils::create("Oxidation (M)", ','), "Variable modifications, specified using Unimod (www.unimod.org) terms, e.g. 'Carbamidomethyl (C)' or 'Oxidation (M)'", false); setValidStrings_("variable_modifications", all_mods); + //FDR and misc + + registerDoubleOption_("q_value_threshold", "", 1, "The FDR threshhold for filtering peptides", false, false); + registerStringOption_("annotate_matches", "", "true", "If the matches should be annotated (default: false),", false, false); + registerStringOption_("deisotope", "", "false", "Sets deisotope option (true or false), default: false", false, false ); + registerStringOption_("chimera", "", "false", "Sets chimera option (true or false), default: false", false, false ); + registerStringOption_("predict_rt", "", "false", "Sets predict_rt option (true or false), default: false", false, false ); + registerStringOption_("wide_window", "", "false", "Sets wide_window option (true or false), default: false", false, false); + registerStringOption_("smoothing", "", "true", "Should the PTM histogram be smoothed and local maxima be picked. If false, uses raw data, default: false", false, false); + registerIntOption_("threads", "", 1, "Amount of threads available to the program", false, false); + // register peptide indexing parameter (with defaults for this search engine) registerPeptideIndexingParameter_(PeptideIndexing().getParameters()); } - ExitCodes main_(int, const char**) override { //------------------------------------------------------------- @@ -434,6 +988,7 @@ class TOPPSageAdapter : // do this early, to see if Sage is installed String sage_executable = getStringOption_("sage_executable"); + std::cout << sage_executable << " sage executable" << std::endl; String proc_stdout, proc_stderr; TOPPBase::ExitCodes exit_code = runExternalProcess_(sage_executable.toQString(), QStringList() << "--help", proc_stdout, proc_stderr, ""); auto major_minor_patch = getVersionNumber_(proc_stdout); @@ -469,20 +1024,52 @@ class TOPPSageAdapter : debug_config_stream.close(); } + String annotation_check; + QStringList arguments; + + if ( (getStringOption_("annotate_matches").compare("true")) == 0) + { arguments << config_file.toQString() << "-f" << fasta_file.toQString() << "-o" << output_folder.toQString() - << "--write-pin"; + << "--annotate-matches" + << "--write-pin"; + } + else + { + arguments << config_file.toQString() + << "-f" << fasta_file.toQString() + << "-o" << output_folder.toQString() + << "--write-pin"; + } + if (batch >= 1) arguments << "--batch-size" << QString(batch); for (auto s : input_files) arguments << s.toQString(); OPENMS_LOG_INFO << "Sage command line: " << sage_executable << " " << arguments.join(' ').toStdString() << std::endl; + + //std::chrono lines for testing/writing purposes only! + + #ifdef CHRONOSET + std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); + // Sage execution with the executable and the arguments StringList + exit_code = runExternalProcess_(sage_executable.toQString(), arguments); + std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); + std::cout << "Time difference = " << std::chrono::duration_cast(end - begin).count() << "[s]" << std::endl; + #endif + #ifndef CHRONOSET // Sage execution with the executable and the arguments StringList - exit_code = runExternalProcess_(sage_executable.toQString(), arguments); + exit_code = runExternalProcess_(sage_executable.toQString(), arguments); + #endif + + + + if (exit_code != EXECUTION_OK) { + std::cout << "Sage executable not found" << std::endl; return exit_code; } @@ -495,16 +1082,19 @@ class TOPPSageAdapter : StringList filenames; StringList extra_scores = {"ln(-poisson)", "ln(delta_best)", "ln(delta_next)", "ln(matched_intensity_pct)", "longest_b", "longest_y", - "longest_y_pct", "matched_peaks", "scored_candidates"}; + "longest_y_pct", "matched_peaks", "scored_candidates"}; + double FDR_threshhold = getDoubleOption_("q_value_threshold"); + vector peptide_identifications = PercolatorInfile::load( output_folder + "/results.sage.pin", true, "ln(hyperscore)", extra_scores, filenames, - decoy_prefix); + decoy_prefix, + FDR_threshhold, + true); - // rename SAGE subscores to have prefix "SAGE:" for (auto& id : peptide_identifications) { auto& hits = id.getHits(); @@ -515,12 +1105,17 @@ class TOPPSageAdapter : if (h.metaValueExists(meta)) { h.setMetaValue("SAGE:" + meta, h.getMetaValue(meta)); - h.removeMetaValue(meta); - } + h.removeMetaValue(meta); + } } } } + + String smoothing_string = getStringOption_("smoothing"); + bool smoothing = !(smoothing_string.compare("true")); + const pair resultsClus = getDeltaClusterCenter(peptide_identifications, smoothing, false); + vector mapD = mapDifftoMods(resultsClus.first, resultsClus.second, peptide_identifications, 0.01, false, output_file); //peptide_identifications; // remove hits without charge state assigned or charge outside of default range (fix for downstream bugs). TODO: remove if all charges annotated in sage IDFilter::filterPeptidesByCharge(peptide_identifications, 2, numeric_limits::max()); @@ -611,7 +1206,6 @@ class TOPPSageAdapter : it->second.emplace(nr,nID); } } - } } @@ -639,12 +1233,9 @@ class TOPPSageAdapter : { } } - IdXMLFile().store(output_file, protein_identifications, peptide_identifications); - return EXECUTION_OK; } - }; diff --git a/tools/ACTIVE_MAINTAINERS b/tools/ACTIVE_MAINTAINERS index 690a9c849d5..327ec10964f 100644 --- a/tools/ACTIVE_MAINTAINERS +++ b/tools/ACTIVE_MAINTAINERS @@ -1,8 +1,11 @@ Chris Bielow -Eugen Netz Julianus Pfeuffer -Hannes Roest Timo Sachsenberg -Hendrik Weisser -Oliver Kohlbacher -Knut Reinert +Samuel Wein +Kyowon Jeong +Axel Walter +Tom Mueller +Matteo Pilz +Arslan Siraj +Ayesha Feroz +Josh Charkow diff --git a/tools/update_ini_files_OpenMS_version.sh b/tools/update_ini_files_OpenMS_version.sh index f59412e5e36..f9007a685e5 100755 --- a/tools/update_ini_files_OpenMS_version.sh +++ b/tools/update_ini_files_OpenMS_version.sh @@ -1,2 +1,2 @@ TOOL_DIR_PATH="./src/tests/topp/" -find $TOOL_DIR_PATH -type f -iname '*.ini' -exec grep -q '