diff --git a/.github/workflows/knime_tests.yml b/.github/workflows/knime_tests.yml index 41b2fc199a5..7136a88cce9 100644 --- a/.github/workflows/knime_tests.yml +++ b/.github/workflows/knime_tests.yml @@ -1,13 +1,17 @@ name: 'Test KNIME workflows' on: workflow_dispatch: - + inputs: + updateURL: + type: string + description: use a specific location for the knime update site + default: https://abibuilder.cs.uni-tuebingen.de/archive/openms/knime-plugin/updateSite/nightly/ jobs: test-knime: env: GH_TOKEN: ${{ github.token }} - KNIME_MAJOR_VERSION: 4 - KNIME_MINOR_VERSION: 7 + KNIME_MAJOR_VERSION: 5 + KNIME_MINOR_VERSION: 3 INSTALLATION_DIR: ${{ github.workspace }} runs-on: ubuntu-latest steps: @@ -37,7 +41,7 @@ jobs: - name: Install OpenMS plugin run: | "$KNIME_DIR/knime" -application org.eclipse.equinox.p2.director \ - -r "http://update.knime.com/analytics-platform/${KNIME_VERSION},https://abibuilder.cs.uni-tuebingen.de/archive/openms/knime-plugin/updateSite/nightly/" \ + -r "http://update.knime.com/analytics-platform/${KNIME_VERSION},${{ inputs.updateURL }}" \ -p2.arch x86_64 \ -profileProperties org.eclipse.update.install.features=true \ -i "de.openms.feature.feature.group,com.genericworkflownodes.knime.feature.feature.group,de.openms.thirdparty.feature.feature.group" \ diff --git a/.github/workflows/openms_ci_matrix_full.yml b/.github/workflows/openms_ci_matrix_full.yml index 49debe090ff..56fcaf1bb8e 100644 --- a/.github/workflows/openms_ci_matrix_full.yml +++ b/.github/workflows/openms_ci_matrix_full.yml @@ -174,8 +174,8 @@ jobs: fi echo "version_number=$VERSION_NUMBER" >> $GITHUB_OUTPUT grep -ne "----[[:space:]]*OpenMS" ${{ github.workspace }}/OpenMS/CHANGELOG > index_changelog.txt - START=$(cat index_changelog.txt | grep -A 1 -e " $VERSION_NUMBER " | cut -f1 -d: | head -1) - END=$(cat index_changelog.txt | grep -A 1 -e " $VERSION_NUMBER " | cut -f1 -d: | tail -1) + START=$(cat index_changelog.txt | grep -A 1 -E " $VERSION_NUMBER(\.0)? " | cut -f1 -d: | head -1) + END=$(cat index_changelog.txt | grep -A 1 -E " $VERSION_NUMBER(\.0)? " | cut -f1 -d: | tail -1) echo "Extracting between lines:" echo $START echo $END @@ -265,8 +265,10 @@ jobs: fi if [[ "${{ matrix.os }}" == macos-* ]]; then + ## Update the package lists for Brew + brew update ## Needed for Qt. Install before to overwrite the default softlinks on the GH runners - brew install python3 --force --overwrite + brew install python@3.12 --force --overwrite brew install --quiet ccache autoconf automake libtool ninja && brew link --overwrite ccache brew install libsvm xerces-c boost eigen sqlite coinutils cbc cgl clp qt echo "cmake_prefix=$(brew --prefix qt)/lib/cmake;$(brew --prefix qt)" >> $GITHUB_OUTPUT @@ -454,7 +456,7 @@ jobs: type: 'tar' directory: ${{ github.workspace }} path: OpenMS - exclusions: 'bld/* ./OpenMS-${{ steps.create_changelog.outputs.version_number }}.tar.gz THIRDPARTY/* .git/*' + exclusions: 'bld/* ./OpenMS-${{ steps.create_changelog.outputs.version_number }}.tar.gz ./THIRDPARTY/** .git/*' filename: 'OpenMS-${{ steps.create_changelog.outputs.version_number }}.tar.gz' @@ -535,7 +537,7 @@ jobs: - name: Download source archive as artifact uses: actions/download-artifact@v4 with: - name: OpenMS-${{ steps.create_changelog.outputs.version_number }}.tar.gz + name: OpenMS-${{ needs.build-and-test.outputs.version_number }}.tar.gz - name: Download changelog as artifact if: inputs.do_release @@ -583,7 +585,7 @@ jobs: mkdir -p ~/.ssh/ echo "$PASS" > ~/.ssh/private.key sudo chmod 600 ~/.ssh/private.key - ln -s ./$folder latest #create link to the release folder + ln -s ../$folder latest #create link to the release folder rsync --progress -avz -e "ssh -i ~/.ssh/private.key -o StrictHostKeyChecking=no" latest "$USER@$HOST:/OpenMSInstaller/release" - name: create RELEASE_TEXT @@ -663,7 +665,7 @@ jobs: if [[ "${{ github.ref_name }}" == "nightly" ]]; then folder=nightly elif [[ "${{ github.ref_name }}" == release/* ]]; then - folder=release/${{ github.ref_name }} + folder=${{ github.ref_name }} else folder=experimental/${{ github.ref_name }} fi @@ -694,7 +696,7 @@ jobs: echo "$PASS" > ~/.ssh/private.key sudo chmod 600 ~/.ssh/private.key ln -s ./$folder latest #we can use the same link from above. - rsync --progress -avz -e "ssh -i ~/.ssh/private.key -o StrictHostKeyChecking=no" latest "$USER@$HOST:/Documentation/release + rsync --progress -avz -e "ssh -i ~/.ssh/private.key -o StrictHostKeyChecking=no" latest "$USER@$HOST:/Documentation/release" # TODO create softlinks to latest nightly # TODO create and upload file hashes, at least for release candidate @@ -702,7 +704,7 @@ jobs: build-deploy-knime-updatesite: env: - KNIME: 5.1 + KNIME: 5.3 JAVA_VER: 17 PLUGIN_BUILD: ${{ github.workspace }}/plugin-build PLUGIN_SOURCE: ${{ github.workspace }}/plugin-source @@ -869,7 +871,7 @@ jobs: if [[ "${{ github.ref_name }}" == "nightly" ]]; then folder=nightly elif [[ "${{ github.ref_name }}" == release/* ]]; then - folder=release/${{ github.ref_name }} + folder=${{ github.ref_name }} else folder=experimental/${{ github.ref_name }} fi @@ -906,7 +908,7 @@ jobs: echo "$PASS" > ~/.ssh/private.key sudo chmod 600 ~/.ssh/private.key ln -s ./$folder latest #create link to the release folder - rsync --progress -avz -e "ssh -i ~/.ssh/private.key -o StrictHostKeyChecking=no" latest "$USER@$HOST:/knime-plugin/updateSite/release + rsync --progress -avz -e "ssh -i ~/.ssh/private.key -o StrictHostKeyChecking=no" latest "$USER@$HOST:/knime-plugin/updateSite/release" do-release: if: inputs.do_release diff --git a/.github/workflows/pyopenms-wheels.yml b/.github/workflows/pyopenms-wheels.yml index 244b4ab1de1..d44aaba9933 100644 --- a/.github/workflows/pyopenms-wheels.yml +++ b/.github/workflows/pyopenms-wheels.yml @@ -135,7 +135,7 @@ jobs: conda remove --name "pyoms-bld-${pynodot}" --all # clean previous pyopenms libs - find . -name "pyopenms*.so" -exec rm -rf {} \; + find . -name "_pyopenms*.pyd" -exec rm -rf {} \; done @@ -181,8 +181,10 @@ jobs: - name: Install contrib packages from brew run: | + ## Update the package lists for Brew + brew update ## Needed for Qt. Install before to overwrite the default softlinks on the GH runners - brew install python3 --force --overwrite + brew install python@3.12 --force --overwrite brew install --quiet ccache autoconf automake libtool ninja && brew link --overwrite ccache brew install libsvm xerces-c boost eigen sqlite coinutils cbc cgl clp qt@5 libomp echo "cmake_prefix=$(brew --prefix qt@5)/lib/cmake;$(brew --prefix qt@5)" >> $GITHUB_OUTPUT @@ -240,7 +242,7 @@ jobs: conda remove --name pyoms-bld-"${pynodot}" --all # clean previous pyopenms libs - find . -name "pyopenms*.so" -exec rm -rf {} \; + find . -name "_pyopenms*.so" -exec rm -rf {} \; done @@ -295,8 +297,10 @@ jobs: - name: Install contrib packages from brew run: | + ## Update the package lists for Brew + brew update ## Needed for Qt. Install before to overwrite the default softlinks on the GH runners - brew install python3 --force --overwrite + brew install python@3.12 --force --overwrite brew install --quiet ccache autoconf automake libtool ninja && brew link --overwrite ccache brew install libsvm xerces-c boost eigen sqlite coinutils cbc cgl clp qt@5 libomp echo "cmake_prefix=$(brew --prefix qt@5)/lib/cmake;$(brew --prefix qt@5)" >> $GITHUB_OUTPUT @@ -354,7 +358,7 @@ jobs: conda remove --name pyoms-bld-"${pynodot}" --all # clean previous pyopenms libs - find . -name "pyopenms*.so" -exec rm -rf {} \; + find . -name "_pyopenms*.so" -exec rm -rf {} \; done @@ -592,6 +596,6 @@ jobs: uses: pypa/gh-action-pypi-publish@release/v1 with: user: __token__ - repository-url: https://test.pypi.org/ + repository-url: https://pypi.org/ password: ${{ secrets.pypi_api_token_release }} packages-dir: ${{ github.workspace }}/wheels diff --git a/.github/workflows/update_version_numbers.yml b/.github/workflows/update_version_numbers.yml index 75361952e52..5d4970d087c 100644 --- a/.github/workflows/update_version_numbers.yml +++ b/.github/workflows/update_version_numbers.yml @@ -16,7 +16,7 @@ jobs: runs-on: macos-latest steps: # Getting files (OpenMS) - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 # Update files with new package version numbers - name: update files @@ -40,6 +40,9 @@ jobs: # update test write ini out: sed -i '' "s# diff --git a/AUTHORS b/AUTHORS index 6db5bc54822..62cf728604e 100644 --- a/AUTHORS +++ b/AUTHORS @@ -53,6 +53,7 @@ the authors tag in the respective file header. - Johan Teleman - Johannes Junker - Johannes Veit + - Johannes von Kleist - Joshua Charkow - Julia Thueringer - Juliane Schmachtenberg @@ -91,6 +92,7 @@ the authors tag in the respective file header. - Radu Suciu - Ralf Gabriels - Rene Hussong + - Rick Helmus - Ruben Grünberg - Samuel Wein - Sandro Andreotti diff --git a/CHANGELOG b/CHANGELOG index 1cc42497929..429223b0795 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,4 @@ - - Glossary: OpenMS - Name of the project and our C++ library TOPP - "The OpenMS PiPeline", collection of chainable tools for flexible HPLC/MS workflows. Formerly known as "The OpenMS Proteomics Pipeline" @@ -13,21 +11,42 @@ PR - Pull Request (on GitHub), i.e. integration of a new feature or bugfix #, e.g. #4957 - a reference to an issue or pull request on GitHub, visit e.g. https://github.com/OpenMS/OpenMS/pull/XXXX (replace XXXX with number of interest) for details ------------------------------------------------------------------------------------------ ----- OpenMS 3.2.0 (under development) ---- +---- OpenMS 3.3.0 (under development) ---- +------------------------------------------------------------------------------------------ + +What's new: +- Changes breaking backwards compatibility: + - the QualityControl TOPP tool has some renamed parameters and supports output directories (#7497) + +Misc: +- FileInfo: + - support MzTab files (#7568) +- TOPPAS + - supports writing results to output directories (for TOPP tools which have such parameters) (#7497) + - TOPPAS tutorial enhanced (#7497) +- FeatureFinderMetabo + - added report_smoothed_intensities parameter (#7594) + +------------------------------------------------------------------------------------------ +---- OpenMS 3.2.0 (released 09/2024) ---- ------------------------------------------------------------------------------------------ What's new: - Changes breaking backwards compatibility: - Rename of parameters for TOPP tool FeatureFinderCentroided (debug -> advanced), and PeakPickerWavelet/TOFCalibration (optimization -> optimization:type) (#7154) - Rename of parameters for TOPP tool IDFilter (score:pep -> score:psm; score:prot -> score:protein; score:protgroup -> score:proteingroup) with 'nan' as new default (#7541) + - 3.2.0 KNIME package requires KNIME 5.3 or later +- Support for SubsetNeighborSearch (SNS) via DecoyDatabase (#7565) +- SageAdapter received large updates including added functionality for PTM discovery + enabling features such as chimera seach, RT prediction, filtering by q-value, etc. Library: - Extend FileHandler to support load and store operations for our major datastructures (spectra, features, identifications, etc.). Replaced file type specific code with the more generic FileHandler calls to decouple the IO code from other parts of the library. - SiriusAdapter reworked to SiriusExport: Instead of running SIRIUS directly, this reworked tool takes multiple mzML and feautureXML (optional) files exporting a single SIRIUS .ms input file as well as an input table with compound info from features for the new AssayGeneratorMetaboSirius tool. (#7234) - Splitting AssayGeneratorMetabo into two tools: In line with the changes to SiriusExport this tool has been split into two separate workflows. AssayGeneratorMetabo generates an assay library from mzML and feautreXML files using an heuristic approach picking the highest intensity MS2 peaks (like before). AssayGeneratorMetaboSirius takes an existing SIRIUS project directory as input to generate an assay library based on fragmentation trees. (#7234) - better documentation for all SpectraFilter... tools (#7183) -- TOPPView: offer Ion mobility view from 2D spectra view (#7423) -- TOPPView: view ion mobility frames, irrespective of its MS level (formerly only MS1 was supported) (#7427) +- TOPPView: + - offer Ion mobility view from 2D spectra view (#7423) + - view ion mobility frames, irrespective of its MS level (formerly only MS1 was supported) (#7427) - OpenSwath: Add output on peak shape metrics to .osw file (#7222) New Tools: @@ -35,20 +54,26 @@ New Tools: - AssayGeneratorMetaboSirius -- Assay library generation from a SIRIUS project directory (Metabolomics) - SiriusExport -- Metabolite identification using single and tandem mass spectrometry + Fixes: - FileConverter: more robust (#7176) - MSFragger: allow relative path to database (#7155) - MSGFPlusAdapter: allow concurrent creation of indexed database (#7272) - CometAdapter: work around bug in Comet 2024.01 rev. 0 to avoid empty results (#7540) - ParamEditor: fixed error for the subsection parameter (ParamNode) to go through store function (#7180) -- TOPPView: fix crash when viewing certain Chromatograms (#7220) -- TOPPView: in 2D view, show correct adjacent layers in context menu, if user clicked to the right of the last MS1 scan (now shows the 4 rightmost MS1 scans, used to show the 4 leftmost scans) (#7423) +- TOPPView: + - fix crash when viewing certain Chromatograms (#7220) + - in 2D view, show correct adjacent layers in context menu, if user clicked to the right of the last MS1 scan (now shows the 4 rightmost MS1 scans, used to show the 4 leftmost scans) (#7423) + - fix glitches in 1D view and layer names (#7549) + - Show prefix ions (e.g. b1) when generating theoretical spectra (#7567) - TOPPAS: open files in TOPPView (#7213) - pyOpenMS: Log warnings in pure Python code with warnings.warn instead of print (#7418) - more robust parsing of mzIdentML (#7153) +- SageAdapter now works with sage v0.15.0 and beyond +- OpenSwath: Fix bug in diaPASEF window determination (#7546) Misc: -- FileInfo: Report IM ranges (if any) (#7459) +- FileInfo: Report ion mobility ranges (if any) (#7459) - OpenMSInfo reports the ILP solver (CoinOr or glpk) (#7156) - add citation information for OpenMS 3.0 (Nat. Methods) (#7383) - Add export for Common Workflow Language (CWL) (#6156) diff --git a/CMakeLists.txt b/CMakeLists.txt index f5d8f303198..6b2aa37cf15 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -105,7 +105,7 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/Windows") #------------------------------------------------------------------------------ set(OPENMS_PACKAGE_VERSION_MAJOR "3") -set(OPENMS_PACKAGE_VERSION_MINOR "2") +set(OPENMS_PACKAGE_VERSION_MINOR "3") set(OPENMS_PACKAGE_VERSION_PATCH "0") set(OPENMS_PACKAGE_VERSION "${OPENMS_PACKAGE_VERSION_MAJOR}.${OPENMS_PACKAGE_VERSION_MINOR}.${OPENMS_PACKAGE_VERSION_PATCH}") diff --git a/THIRDPARTY b/THIRDPARTY index d6594eb775e..6c4aff683f5 160000 --- a/THIRDPARTY +++ b/THIRDPARTY @@ -1 +1 @@ -Subproject commit d6594eb775ebca0255f0129d946d7e582b37ac37 +Subproject commit 6c4aff683f5d6cf209a240a4e29fae7be16ce1ad diff --git a/cmake/OpenMSConfig.cmake.in b/cmake/OpenMSConfig.cmake.in index c5668336605..cd403ea8fda 100644 --- a/cmake/OpenMSConfig.cmake.in +++ b/cmake/OpenMSConfig.cmake.in @@ -50,6 +50,7 @@ endif() #TODO somehow add the same/compatible versions that were found by OpenMS? And what about static vs dynamic? E.g. if we link to static zlib in OpenMS(.dll) what (if at all) can/should the consumer link against? find_dependency(Qt6 @QT_MIN_VERSION@ COMPONENTS @OpenMS_QT_COMPONENTS@) find_dependency(XercesC) +find_dependency(Eigen3 3.3.4) find_dependency(LIBSVM 2.91) # Rest are private linked libraries @@ -58,7 +59,6 @@ find_dependency(LIBSVM 2.91) #find_dependency(GLPK) #find_dependency(ZLIB) #find_dependency(BZip2) -#find_dependency(Eigen3 3.3.4) #find_dependency(SQLite3 3.15.0) #find_dependency(HDF5) diff --git a/cmake/Windows/ReleaseNotes.txt b/cmake/Windows/ReleaseNotes.txt index 2252884e06a..83e55daee59 100644 --- a/cmake/Windows/ReleaseNotes.txt +++ b/cmake/Windows/ReleaseNotes.txt @@ -5,49 +5,23 @@ the OpenMS library. several small applications that can be chained to create analysis pipelines tailored for a specific problem. -Note that the Windows binary release is meant for end-users only, rather than -for developers who want to contribute algorithms. Developers should download +Note that this Windows binary release is meant for end-users only, rather than +for developers who want to contribute algorithms. Developers should download the source code version (see https://www.openms.de for details). ====================== Read the documentation ====================== -To get familiar with TOPP we recommend reading the documentation. Look into -your Start-Menu (Start - Programs - OpenMS) and open the Documentation page. -On the left side you will see the TOPP section. Read it after finishing with -this document! Ignore any technical details; these are meant for developers. -Especially you should read the "TOPP tutorial" and "TOPP documentation". - -The TOPP tools can be called from two graphical -applications (GUI) which are part of this installation; either "TOPPView", or "TOPPAS". -See the documentation for a tutorial on how to use TOPPView and TOPPAS. -Pipelines can also be created this way. -Advanced users can also use scripts and the Windows command line. Pipelines can be run from -a GUI tool or via the "ExecutePipeline" command line tool. - -The INIFileEditor allows you to edit .ini-Files, which are XML configuration -files that every TOPP tool can use to adjust its parameters. - -======================== -The Windows command line -======================== - -This part is usually not necessary for the common user, even if you are advanced. -However, special requirements (e.g. automated conversion tools, webserver or cluster integration) -might require you to use the command line. -To get familiar with the Windows command line, we recommend reading one of -many tutorials available on the internet -(e.g. http://www.voidspace.org.uk/python/articles/command_line.shtml). The -most useful command you will need are 'cd' (change into a directory) and 'dir' -(list the content of the current directory). You can get further information -on those commands by typing "help cd" or "help dir". Typing "help" alone will -give you all available commands. You will find a link in the OpenMS start -menu group, which opens a Windows command line. Once the command line is -open, you can type 'dir' to see the names of the TOPP tools, which reside in -the directory. You can just call a TOPP tool without any further arguments - -it will print some useful information and give you hints on how to use the -TOPP tool. Note that you can call the TOPP tools from any directory, i.e. you -do not have to be in "C:\program files\OpenMS\TOPP" to call the FeatureFinder -tool. + +To get familiar with TOPP we recommend reading the documentation. +If you installed the documention that came with this installer, +look into your Windows Start-Menu and start typing "OpenMS Documentation". +Autocompletion will do the rest... + +You can also visit our online documentation at https://openms.de/current_doxygen/, which is identical to the +local docs (if you installed them). + +Another good source of information with an introduction to the wider OpenMS ecosystem can be found at +https://openms.readthedocs.io/en/latest/getting-started/introduction.html diff --git a/cmake/build_system_macros.cmake b/cmake/build_system_macros.cmake index 16c112d6478..a9e7fd6cee0 100644 --- a/cmake/build_system_macros.cmake +++ b/cmake/build_system_macros.cmake @@ -60,35 +60,9 @@ macro(find_boost) "1.77.1" "1.77.0" "1.77" "1.76.1" "1.76.0" "1.76" "1.75.1" "1.75.0" "1.75" - "1.74.1" "1.74.0" "1.74" - "1.73.1" "1.73.0" "1.73" - "1.72.1" "1.72.0" "1.72" - "1.71.1" "1.71.0" "1.71" - "1.70.1" "1.70.0" "1.70" - "1.69.1" "1.69.0" "1.69" - "1.68.1" "1.68.0" "1.68" - "1.67.1" "1.67.0" "1.67" - "1.66.1" "1.66.0" "1.66" - "1.65.1" "1.65.0" "1.65" - "1.64.1" "1.64.0" "1.64" - "1.63.1" "1.63.0" "1.63" - "1.62.1" "1.62.0" "1.62" - "1.61.1" "1.61.0" "1.61" - "1.60.1" "1.60.0" "1.60" - "1.59.1" "1.59.0" "1.59" - "1.58.1" "1.58.0" "1.58" - "1.57.1" "1.57.0" "1.57" - "1.56.1" "1.56.0" "1.56" - "1.55.1" "1.55.0" "1.55" - "1.54.1" "1.54.0" "1.54" - "1.53.1" "1.53.0" "1.53" - "1.52.1" "1.52.0" "1.52" - "1.51.1" "1.51.0" "1.51" - "1.50.1" "1.50.0" "1.50" - "1.49.1" "1.49.0" "1.49" - "1.48.1" "1.48.0" "1.48") + "1.74.1" "1.74.0" "1.74") - find_package(Boost 1.48.0 COMPONENTS ${ARGN} REQUIRED) + find_package(Boost 1.74.0 COMPONENTS ${ARGN} REQUIRED) endmacro(find_boost) diff --git a/cmake/cmake_findExternalLibs.cmake b/cmake/cmake_findExternalLibs.cmake index aa30ec8a7cd..891dc20ce39 100644 --- a/cmake/cmake_findExternalLibs.cmake +++ b/cmake/cmake_findExternalLibs.cmake @@ -200,12 +200,6 @@ ELSE() ENDIF() - -# see https://github.com/ethereum/solidity/issues/4124 -if("${Boost_MAJOR_VERSION}.${Boost_MINOR_VERSION}" VERSION_LESS "1.59") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBOOST_VARIANT_USE_RELAXED_GET_BY_DEFAULT") -endif() - #------------------------------------------------------------------------------ # PTHREAD #------------------------------------------------------------------------------ diff --git a/doc/doxygen/images/TOPPAS_edges.png b/doc/doxygen/images/TOPPAS_edges.png index b780f1f63e4..34cc5d39b78 100644 Binary files a/doc/doxygen/images/TOPPAS_edges.png and b/doc/doxygen/images/TOPPAS_edges.png differ diff --git a/doc/doxygen/images/TOPPAS_parameters.png b/doc/doxygen/images/TOPPAS_parameters.png index d1b9c302cc0..7e88e2aba7e 100644 Binary files a/doc/doxygen/images/TOPPAS_parameters.png and b/doc/doxygen/images/TOPPAS_parameters.png differ diff --git a/doc/doxygen/images/TOPPAS_qc.png b/doc/doxygen/images/TOPPAS_qc.png new file mode 100644 index 00000000000..03de38a42a8 Binary files /dev/null and b/doc/doxygen/images/TOPPAS_qc.png differ diff --git a/doc/doxygen/images/TOPPAS_recycling.png b/doc/doxygen/images/TOPPAS_recycling.png new file mode 100644 index 00000000000..052531afcee Binary files /dev/null and b/doc/doxygen/images/TOPPAS_recycling.png differ diff --git a/doc/doxygen/images/TOPPAS_simple_example.png b/doc/doxygen/images/TOPPAS_simple_example.png index 2510a20de6c..c4559caeb95 100644 Binary files a/doc/doxygen/images/TOPPAS_simple_example.png and b/doc/doxygen/images/TOPPAS_simple_example.png differ diff --git a/doc/doxygen/install/install-linux.doxygen b/doc/doxygen/install/install-linux.doxygen index fd0caf25a28..29cfc395715 100644 --- a/doc/doxygen/install/install-linux.doxygen +++ b/doc/doxygen/install/install-linux.doxygen @@ -53,7 +53,7 @@
  • For the complete feature set to be enabled, %OpenMS needs recent versions of - \b Boost (>= 1.48), \b Eigen3 (>= 3.3.2), \b libHDF5, \b libSVM (2.91 or higher but not 3.15), + \b Boost (>= 1.74), \b Eigen3 (>= 3.4.0), \b libHDF5, \b libSVM (2.91 or higher but not 3.15), \b glpk (>= 4.45) or \b CoinMP (>= 1.3.3), \b zlib, \b libbz2, and \b Xerces-C (>= 3.1.1). These should be built by our contrib build script in case they are not already installed via your package manager.
  • @@ -84,7 +84,7 @@ # Get REQUIRED installable contrib libraries sudo yum install cmake3 qt6-qtbase-gui qt6-qtbase-devel qt6-qtsvg-devel python-devel # Get OPTIONAL installable contrib libraries preferably from our Contrib package in the next step. - ## sudo yum boost-devel libsvm-devel libzip-devel zlib-devel xerces-c-devel bzip2-devel libhdf5-devel glpk-devel + ## sudo yum install boost-devel libsvm-devel libzip-devel zlib-devel xerces-c-devel bzip2-devel libhdf5-devel glpk-devel # NOTE that you might need to use cmake3 instead of cmake in the commands of the following steps. diff --git a/doc/doxygen/public/TOPPAS.doxygen b/doc/doxygen/public/TOPPAS.doxygen index 673596f41aa..88aad759d63 100644 --- a/doc/doxygen/public/TOPPAS.doxygen +++ b/doc/doxygen/public/TOPPAS.doxygen @@ -18,8 +18,8 @@ /** @page TOPPAS_tutorial TOPPAS tutorial - @b TOPPAS allows to create, edit, open, save, and run TOPP workflows. Pipelines - can be created conveniently in a GUI. The + The %OpenMS Pipeline Assistant (@ref TOPP_TOPPAS) allows to create, edit, open, save, and run TOPP workflows. Pipelines + can be created conveniently in the @ref TOPP_TOPPAS "TOPPAS GUI". The parameters of all involved tools can be edited within TOPPAS and are also saved as part of the pipeline definition in the @p TOPPAS file. @@ -31,14 +31,14 @@ @page TOPPAS_general General introduction @b TOPPAS allows you to create, edit, open, save, and run TOPP workflows. Pipelines - can be created conveniently in a GUI. The + can be created conveniently in the @ref TOPP_TOPPAS "TOPPAS GUI". The parameters of all involved tools can be edited within TOPPAS - and are also saved as part of the pipeline definition in the @p .toppas file. + and are also saved as part of the pipeline definition in the .toppas file. Furthermore, @b TOPPAS interactively performs validity checks during the pipeline editing process and before execution (i.e., a dry run of the entire pipeline), in order to prevent the creation of invalid workflows. Once set up and saved, a workflow can also be run without the GUI using - @p ExecutePipeline @p -in @p \. + @ref TOPP_ExecutePipeline. The following figure shows a simple example pipeline that has just been created and executed successfully: @@ -46,37 +46,53 @@ @image html TOPPAS_simple_example.png @image latex TOPPAS_simple_example.png "" width=14cm + @note Some example pipelines are shipped with %OpenMS and can be opened in TOPPAS by selecting @p File > @p Open @p example @p file. + Also see @subpage TOPPAS_examples for more information. + + + @page TOPPAS_interface User interface + + @section TOPPAS_interface_introduction Creating a pipeline from scratch + To create a new TOPPAS file, you can either: - open TOPPAS without providing any existing workflow - an empty workflow will be opened automatically - in a running TOPPAS program choose: @p File @p > @p New - - create an empty file in your file browser (explorer) with the suffix @p \.toppas and double-click it (on Windows systems all @p \.toppas files are associated with TOPPAS automatically during installation of %OpenMS, on Linux and MacOS you might need to manually associate the extension) - - @page TOPPAS_interface User interface - - @section TOPPAS_interface_introduction Introduction + - create an empty file in your file browser (Windows Explorer, MacOS Finder, Nautilus, etc) with the suffix @p \.toppas and double-click it (on Windows systems all @p \.toppas files are associated with TOPPAS automatically during installation of %OpenMS, on Linux and MacOS you might need to manually associate the extension) + + When you start TOPPAS, you will see the main window with a list of TOPP tools on the left side. - The following figure shows the @b TOPPAS main window and a pipeline which is just being created. + The following figure shows the @ref TOPP_TOPPAS main window and a pipeline which is just being created. The user has added some tools by drag&dropping them from the TOPP tool list on the left - onto the central window. Additionally, the user has added nodes - for input and output files. - - Next, the user has drawn some connections between the tools - which determine the data flow of the pipeline. Connections can be drawn by first @em deselecting the - desired source node (by left-clicking anywhere on the white background) - and then dragging (i.e. left-click and keep the button pressed) the mouse from the source to the target node (if - a @em selected node is dragged, it is moved on the canvas instead). + onto the central window (double clicking an item in the tool list also works). + Additionally, the user has added nodes for input and output files. + You can arrange the tools/nodes on the canvas freely by left-clicking them with the mouse, such that they become selected (bold) and then + dragging (i.e. left-click and keep the mouse button pressed) them to their desired position with the mouse. + + @note To find TOPP tools in the list, you can either scroll through the list or use the search bar at the top of the list. + The search bar will filter the list as you type, so you can quickly find the tool you are looking for. + + + @subsection TOPPAS_tut_edges On connections (=edges) + + Edges determine the data flow of the pipeline. Connections can be drawn by dragging (i.e. left-click and keep the mouse button pressed) + the mouse from the source to the target node. Before starting the drag, make sure that you de-select any node or edge by left-clicking + anywhere on the white canvas background. When a connection is created, and the source (, or the target) has more than one output (, or input) parameter, an input/output parameter mapping dialog shows up and lets the user select the output parameter of the source node and the - input parameter of the target node for this data flow - shown here for the connection between FalseDiscoveryRate and IDFilter. + input parameter of the target node for this data flow - shown above for the connection between @ref TOPP_FalseDiscoveryRate and @ref TOPP_IDFilter. If the file types of the selected input and output parameters are not compatible with each other, @b TOPPAS will refuse to add the connection. It will also refuse to add a connection if it would create a cycle in the workflow, or if it just would not make sense, e.g., if - its target is an input file node. The connection between the input file node (#1) and the CometAdapter (#5) - tool is painted yellow which indicates it is not ready yet, because no input files have been - specified. + an edge points to an input file node. + + If an edge is painted orange which indicates it is not ready yet. Usually, because no input files have been + specified.
    + A green edge indicates that the edge is ready to be executed.
    + A red edge indicates that the edge is not ready to be executed, + e.g., because the input files are not compatible with the tool's input requirements. @image html TOPPAS_edges.png @image latex TOPPAS_edges.png "" width=14cm @@ -86,25 +102,79 @@ All visible items (i.e. connections and the different kinds of nodes) have such a context menu. For a detailed list of the different menus and their entries, see @ref TOPPAS_interface_menus . - The following figure shows a possible next step: the user has double-clicked one of the tool nodes in order - to configure its parameters. By default, the standard parameters are used for each tool. Again, this can also + @subsection TOPPAS_tut_param Configuring tool parameters + + TOPP tools can be configured by double-clicking the tool node. + By default, the standard parameters are used for each tool. Again, this can also be done by selecting @p Edit @p parameters from the context menu of the tool. @image html TOPPAS_parameters.png @image latex TOPPAS_parameters.png "" width=14cm + @subsection TOPPAS_tut_input About input nodes + Once the pipeline has been set up, the input files have to be specified before the pipeline can be executed. This is done by double-clicking an input node and selecting the desired files in the dialog that appears. + You can also drag'n'drop files from your file manager into the dialog to add them to the list. - Input nodes have a special mode named @em "recycling mode", i.e., if the input node has fewer files than the following - node has rounds (as it might have two incoming connections) then the files are recycled until all rounds - are satisfied. This might be useful if one input node specifies a single database file (for a Search-Adapter like Mascot) - and another input node has the actual MS2 experiments (which is usually more than one). Then the database input - node would be set to "recycle" the database file, i.e. use it for every run of the MascotAdapter node. The input - list can be recycled an arbitrary number of times, but the recycling has to be @em complete, i.e. the number of rounds of the - downstream node have to be a multiple of the number of input files. Recycling mode can be activated by right-clicking - the input node and selecting the according entry from the context menu. + @subsection TOPPAS_tut_output_nodes About output nodes + + Output files from any TOPP tool in the pipeline can be stored permanently (i.e., after the pipeline has finished and TOPPAS is closed) by adding either of these nodes after any TOPP tool: + + - an `output files` node
    + This node can be connected to any tool that produces output files - either a single file or a list of files. + - an `output folder` node
    + This node can be connected to any tool that support output folders (which is more rare than output files), e.g., the @ref TOPP_QualityControl tool. + + You should use these output nodes to store the results of any TOPP node you may need later on; typically the TOPP nodes which come last in the pipeline. + If you do not add output nodes, the results from TOPP nodes will be stored in the temporary folder and will be deleted when you close TOPPAS. + You can add multiple output nodes at different places in the pipeline to store intermediate results, if you feel you need them later on. + + See @ref TOPPAS_tut_output and @ref TOPPAS_tut_running for more information on output and temporary files. + + @subsection TOPPAS_tut_recycling On "Recycling" mode + + @image html TOPPAS_recycling.png + @image latex TOPPAS_recycling.png "" width=14cm + + Input nodes and all TOPP nodes have a special mode named @em "recycling mode". + Imagine a typical node, such as @ref TOPP_CometAdapter. Every time it runs, it consumes a single mzML file and a single FASTA file. + Thus, the node has two input edges, one for the mzML file and one for the FASTA file. + In a typical workflow, you have a bunch of mzML files, say five, in one `input files` node, but only one FASTA file the other `input files` node. + CometAdapter will run five times. This is what we call a 'round', i.e. one invocation of the node. + If you want to run CometAdapter with the same FASTA file for all five mzML files, you can set the FASTA input node to "recycle" the FASTA file. + The alternative would be to have five identical FASTA files in the input node, which is not very elegant. + + The input from a recylced node can be used an arbitrary number of times, but the recycling has to be "complete", i.e. the number of rounds of the + downstream node (CometAdapter in our example) have to be a multiple of the number of input files. Typically, the number of items to be recycled is 'one' (e.g. one FASTA file), so this usually not a problem. + + Recycling mode can be activated by right-clicking the input node and clicking the "Toggle recycling mode" entry from the context menu. + + See @ref TOPPAS_tut_edges for an example of a recycling input node. + @subsection TOPPAS_tut_specialnodes On special nodes (Merger and Collector) + + Sometimes, it is necessary to merge or collect files from different input nodes. + This is where the @em Merger and @em Collector nodes come into play. + + As its name suggests, a @p merger merges its incoming file lists, i.e., + files of all incoming edges are appended into new lists (which + have as many elements as the merger has incoming connections). All tools this merger has outgoing + connections to are called with these merged lists as input files. All incoming connections should + pass the same number of files (unless the corresponding preceding tool is in recycling mode). + For example, if a merger has three incoming connections, it will pass on a list of three files to the next tool. + This will happen as often as each incoming connection has files. + + A @p collector node, on the other hand, waits for all rounds to finish before concatenating all files from all + incoming connections into one single list. It then calls the next tool with this list of files as input. + This will happen exactly once during the entire pipeline run. Typically, a collector node is used to collect + all files from a FeatureFinder node (which is invoked many times, once for each raw file) and pass the list of + resulting featureXML files to a MapAligner tool (which runs only once, on all featureXML files simulaneously). + + There is also a @p splitter node, which is the opposite of a collector, but it should be required only in very rare cases. + + @subsection TOPPAS_tut_running Running the pipeline + Finally, if you have input and output nodes at every end of your pipeline and all connections are green, you can select @p Pipeline @p > @p Run in the menu bar or just press @p F5. @@ -113,32 +183,45 @@ You will be asked for an output file directory where a sub-directory, @p TOPPAS_out, will be created. This directory will contain your output files. - You can also specify the number of jobs (i.e. TOPP tool invocations) that TOPPAS is allowed to run in parallel. If a number greater than 1 + Also, you can specify the number of jobs (i.e. TOPP tool invocations) that TOPPAS is allowed to run in parallel (see @ref TOPPAS_tut_parallel below for details). + + During pipeline execution, the status lights in the top-right corner of the + tools indicate if the tool has finished successfully (green), is currently running (yellow), + has not done anything so far (gray), + is scheduled to run next (blue), or has crashed (red). + The numbers in the bottom-right corner of every tool show how many files have already been processed and + the overall number of files to be processed by this tool. + When the execution has finished, you can check the generated output files of every node quickly by right-clicking on the node and + selecting @p Open @p files @p in @p TOPPView or @p Open @p containing @p folder from the context menu. + + + @subsection TOPPAS_tut_output Output and temporary files + + In addition to @p TOPPAS_out, which holds all files captured in `output files` and `output folder` node of the pipeline, a @p TOPPAS_tmp directory will be created in the %OpenMS temp path + (call the @ref TOPP_OpenMSInfo tool to see where exactly). + The @p TOPPAS_tmp will contain all temporary files that are passed from tool to tool within the pipeline. + Both folders contain further sub-directories which are named after the number in the top-left corner of the node they + belong to (plus the name of the tool for temporary files). + + @note Files in the @p TOPPAS_out directory are not automatically deleted after the pipeline execution. These are your results! You have to delete them manually if you don't need them anymore. Files in the @p TOPPAS_tmp directory are deleted automatically upon closing the pipeline or the TOPPAS GUI. + + @subsection TOPPAS_tut_parallel On parallel execution + + You can specify the number of jobs (i.e. TOPP tool invocations) that TOPPAS is allowed to run in parallel in the + "Run dialog" (after pressing F5). If a number greater than 1 is selected, TOPPAS will parallelize the pipeline execution in the following scenarios: - - A tool has to process more than one input file, but can only handle one file at a time (as is the case for most TOPP tools; notable exceptions are MapAligners and FeatureLinkers). In this case, multiple instances of the tool are started in parallel. + - A tool has to process more than one input file, but can only handle one file at a time (as is the case for most TOPP tools; notable exceptions are MapAligners and FeatureLinkers). In this case, multiple instances of the same tool are run in parallel. - The pipeline contains multiple branches that are independent of each other. In this case, nodes in independent branches are run in parallel. - Be careful with this setting, however, as some of the TOPP tools require large amounts of RAM (depending - on the size of your dataset). Running too many parallel jobs on a machine with not enough memory will cause problems. + Be careful with this setting, however, as some of the TOPP tools require larger amounts of RAM (depending + on the size of your dataset). Running too many parallel jobs on a machine with not enough memory may cause problems. Also, do not confuse this setting with the @em threads parameter of the individual TOPP tools: every TOPP tool has this parameter specifying the maximum number of threads the tool is allowed to use (although only a subset of the TOPP tools make use of this parameter, since there are tasks that cannot be computed in parallel). Be especially careful with combinations - of both parameters! If you have a pipeline containing the @em FeatureFinderCentroided, for example, and set its @em threads parameter - to 8, and you additionally set the number of parallel jobs in @b TOPPAS to 8, then you may end up using 8*8=64 threads in parallel (if you have 8 or more input files), which + of both parameters! If you have a pipeline containing the @em FeatureFinderCentroided, for example, and its @em threads parameter + is set to 8, and you additionally set the number of parallel jobs in @b TOPPAS to 8, then you may end up using 8*8=64 threads in parallel (if you have 8 or more input files), which might not be what you intended to do. - - In addition to @p TOPPAS_out, a @p TOPPAS_tmp directory will be created in the %OpenMS temp path - (call the @em OpenMSInfo tool to see where exactly). - It will contain all temporary files that are passed from tool to tool within the pipeline. - Both folders contain further sub-directories which are named after the number in the top-left corner of the node they - belong to (plus the name of the tool for temporary files). During pipeline execution, the status lights in the top-right corner of the - tools indicate if the tool has finished successfully (green), is currently running (yellow), - has not done anything so far (gray), is scheduled to run next (blue), or has crashed (red). - The numbers in the bottom-right corner of every tool show how many files have already been processed and - the overall number of files to be processed by this tool. - When the execution has finished, you can check the generated output files of every node quickly by selecting - @p Open @p files @p in @p TOPPView or @p Open @p containing @p folder from the context menu (right click on the node). @section TOPPAS_interface_mk Mouse and keyboard @@ -173,9 +256,9 @@ The edge will be colored as dark magenta to indicate parameter copying. - @section TOPPAS_interface_menus Menus + @section TOPPAS_interface_menus TOPPAS Menus - @b Menu @b bar: + @b Main @b Menu @b bar: @n @n In the @p File menu, you can @@ -184,10 +267,9 @@ - open an existing one (@p Open) - open an example file (@p Open @p example @p file) - include an existing workflow to the current workflow (@p Include) - - visit the online workflow repository (@p Online @p repository) - save a workflow (@p Save / @p Save @p as) - export the workflow as image (@p Export @p as @p image) - - refresh the parameter definitions of all tools contained in the workflow (@p Refresh @p parameters) + - refresh the parameter definitions of all tools contained in the workflow. This is useful to make old pipelines run on the latest OpenMS/TOPPAS versions (@p Refresh @p parameters) - close the current window (@p Close) - load and save TOPPAS resource files (.trf) (@p Load / @p Save @p TOPPAS @p resource @p file) @@ -216,7 +298,7 @@ - specify the input files - open the specified files in TOPPView - - open the input files' folder in the window manager (explorer) + - open the input files' folder in the window manager (Windows Explorer, MacOS Finder etc) - toggle the "recycling" mode - copy, cut, and remove the node @@ -226,7 +308,7 @@ - configure the parameters of the tool - resume the pipeline at this node - open its temporary output files in TOPPView - - open the temporary output folder in the file manager (explorer) + - open the temporary output folder in the file manager (Windows Explorer, MacOS Finder etc) - toggle the "recycling" mode - copy, cut, and remove the node @@ -246,17 +328,21 @@ @page TOPPAS_examples Examples - The following sections explain the example pipelines TOPPAS comes with. You can - open them by selecting @p File > @p Open @p example @p file. All input files and - parameters are already specified, so you can just hit @p Pipeline > @p Run (or press + The following sections explain the example pipelines TOPPAS comes with. + + You can @em open all examples pipelines by selecting @p File > @p Open @p example @p file in TOPPAS. + + All input files and parameters are already specified, so you can just hit @p Pipeline > @p Run (or press @p F5) and see what happens. @section TOPPAS_peak_picking_example Profile data processing - The file @p peakpicker_tutorial.toppas contains a simple pipeline representing a + The file @p peakpicker_tutorial.toppas can be inspect it in TOPPAS via `File -> Open Example File`. + It contains a simple pipeline representing a common use case: starting with profile data, the noise is eliminated and the baseline is subtracted. Then, PeakPickerHiRes is used to find all peaks in the noise-filtered and baseline-reduced profile data. + @image html TOPPAS_example_profile_data_processing.png @image latex TOPPAS_example_profile_data_processing.png "" width=14cm @@ -264,9 +350,9 @@ @section TOPPAS_id_example Identification of E. coli peptides This section describes an example identification pipeline contained in the - example directory, @p Ecoli_Identification.toppas. It is shipped together + example directory, @p Ecoli_Identification.toppas. Inspect it in TOPPAS via `File -> Open Example File`. It is shipped together with a reduced example mzML file containing 139 MS2 spectra from an E. coli - run on an Orbitrap instrument as well as an E. coli target-decoy database. + run on an Orbitrap instrument as well as an E. coli target-decoy database (which was created using @ref TOPP_DecoyDatabase). We use the search engine Comet (Eng et al., 2012) for peptide identification. Therefore, @@ -284,29 +370,30 @@ @image latex TOPPAS_Ecoli_Identification.png "" width=12cm Extensions to this pipeline would be to do the annotation of the spectra with - multiple search engines and combine the results afterwards, using the ConsensusID + multiple search engines and combine the results afterwards, using the @ref TOPP_ConsensusID TOPP tool. - The results may be exported using the TextExporter tool, for further downstream analysis with + The results may be exported using the @ref TOPP_TextExporter tool, for further downstream analysis with non-OpenMS tools. @section TOPPAS_quant_example Quantitation of BSA runs The simple pipeline described in this section (@p BSA_Quantitation.toppas) can be used to quantify peptides - that occur on different runs. The example dataset contains three different bovine serum albumin (BSA) runs. - First, FeatureFinderCentroided is called since the dataset is centroided. The + that occur on different runs (you can inspect it in TOPPAS via `File -> Open Example File`). + The example dataset contains three different bovine serum albumin (BSA) runs. + First, @ref TOPP_FeatureFinderCentroided is called since the dataset is already centroided (i.e. peak picking took place already). The results of the feature finding are then annotated with (existing) identification results. For convenience, we provide these search results (as idXML files) with an FDR of 5% in the BSA directory. @image html TOPPAS_BSA_Quantitation.png @image latex TOPPAS_BSA_Quantitation.png "" width=8cm - Identifications are mapped to features by the IDMapper. The last step - is performed by FeatureLinkerUnlabeled which links corresponding features. The results can be + Identifications are mapped to features (3D quantitation points of peptide signals) by the @ref TOPP_IDMapper. The last step + is performed by @ref TOPP_FeatureLinkerUnlabeled which links corresponding features across runs. The results can be used to calculate ratios, for example. The data could also be exported to a text based - format using the TextExporter for further processing (e.g., in Microsoft Excel). + format using the @ref TOPP_TextExporter for further processing (e.g., in Microsoft Excel). - The results can be opened in TOPPView. The next figures show the results in 2D + The results can be opened in @ref TOPP_TOPPView. The next figures show the results in 2D and 3D view, together with the feature intermediate results. One can see that the intensities and retention times are slightly different between the runs. To correct for retention times shift, a map alignment could be done, @@ -318,22 +405,52 @@ @image html TOPPAS_BSA_results_3d.png @image latex TOPPAS_BSA_results_3d.png "" width=10cm + + + @section TOPPAS_qc_example Quality control of a DDA run + + @image html TOPPAS_qc.png + @image latex TOPPAS_qc.png "" width=10cm + + A quality control pipeline for DDA data (@p QualityControl.toppas), which also performs identification (using @ref TOPP_MSGFPlusAdapter), + mass calibration (using @ref TOPP_InternalCalibration) and quantitation (using @ref TOPP_FeatureFinderIdentification). + + Since we analyse more than one raw file (run), we can also employ MapAlignment and FeatureLinking, + to transfer identifications and enable a quantitative comparison across samples. + + Finally, the @ref TOPP_QualityControl node receives all intermediate and final output data to extract and compute quality metrics. + These are exported in the form several output files which can be inspected. + + Most important is probably the output folder (node #25), which contains MaxQuant compatible output (@p evidence.txt and @p msms.txt). + These .txt files can be used as input to external tools, such as PTX-QC to automatically obtain a comprehensive QC report. + + @section TOPPAS_subsetneighborsearch_example Subset Neighbor Search + + We will use a special FDR search strategy described by Lin et al.. + It uses a mode of @ref TOPP_DecoyDatabase which allows to create a special "neighbor" database to control the FDR. + + The example pipeline is named @p FDR_NeighborSearch.toppas. Inspect it in TOPPAS via `File -> Open Example File`. A description is provided within the TOPPAS workflow. + @section TOPPAS_merger_example Merger and Collect nodes The following example is actually not a useful workflow but is supposed to demonstrate how merger and collector nodes can be used in a pipeline. Have a look at - @p merger_tutorial.toppas: + @p merger_tutorial.toppas. Inspect it in TOPPAS via `File -> Open Example File`. @image html TOPPAS_example_merger.png @image latex TOPPAS_example_merger.png "" width=14cm - As its name suggests, a merger merges its incoming file lists, i.e., - files of all incoming edges are appended into new lists (which - have as many elements as the merger has incoming connections). All tools this merger has outgoing - connections to are called with these merged lists as input files. All incoming connections should - pass the same number of files (unless the corresponding preceding tool is in recycling mode). + In short: mergers require multiple input edges, whose data is combined bit by bit. + Collectors on the other hand usually only have one input edge and combine all files from this single edge into a list in one go. The succeeding tool will be invoked only once, with a list of input files. + + In detail, a @em merger merges its incoming file lists, i.e., + files of all incoming edges are combined into new lists. Each file list has an many elements as the merger has incoming connections. + And there are as many lists as there are files(rounds) from the the preceeding(=upstream) tool. The tool downstream of the merger is invoked with + these merged lists as input files. + All incoming connections should pass the same number of files (unless one of the upstream nodes is in "recycling mode"), such that all merged lists have the same number of files. + In other words, if you have K input edges with N files each, the merger will create N output lists, with K elements each. - A collector node, on the other hand, waits for all rounds to finish before concatenating all files from all + A collector node, on the other hand, waits for all rounds to finish on its upstream side before concatenating all files from all incoming connections into one single list. It then calls the next tool with this list of files as input. This will happen exactly once during the entire pipeline run. diff --git a/share/OpenMS/CHEMISTRY/Enzymes.xml b/share/OpenMS/CHEMISTRY/Enzymes.xml index 99a37f77390..cbb50a8e6c7 100755 --- a/share/OpenMS/CHEMISTRY/Enzymes.xml +++ b/share/OpenMS/CHEMISTRY/Enzymes.xml @@ -150,6 +150,7 @@ + @@ -210,6 +211,7 @@ + diff --git a/share/OpenMS/CHEMISTRY/custom_mods.xml b/share/OpenMS/CHEMISTRY/custom_mods.xml new file mode 100644 index 00000000000..b5dac0748ae --- /dev/null +++ b/share/OpenMS/CHEMISTRY/custom_mods.xml @@ -0,0 +1,473 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Research Mansucript + Misc. URL + https://pubs.acs.org/doi/full/10.1021/acs.jproteome.1c00827 + + Phospho Decoy can be used to compute FLR in phospho proteomics experiments, the method is called pAla + + + + \ No newline at end of file diff --git a/share/OpenMS/CHEMISTRY/unimod.xml b/share/OpenMS/CHEMISTRY/unimod.xml index 4c3f85c7553..27c7c09e2c1 100644 --- a/share/OpenMS/CHEMISTRY/unimod.xml +++ b/share/OpenMS/CHEMISTRY/unimod.xml @@ -1151,16 +1151,16 @@ - diff --git a/share/OpenMS/examples/TOPPAS/peakpicker_tutorial.toppas b/share/OpenMS/examples/TOPPAS/peakpicker_tutorial.toppas index d73d844f081..8b2521fe362 100644 --- a/share/OpenMS/examples/TOPPAS/peakpicker_tutorial.toppas +++ b/share/OpenMS/examples/TOPPAS/peakpicker_tutorial.toppas @@ -1,7 +1,7 @@ - + +<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#000000;">Depending on your data, either of the two filters may be removed or reconfigured with other parameters.</span></p> +<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#000000;">As a last step, PeakPickerHiRes will convert profile peaks into centroided peaks and store the result in Node #5 as mzML file.</span></p></body></html>]]>" type="string" description="" required="false" advanced="false" /> - + + + - - - - - + + + @@ -46,12 +48,12 @@ p, li { white-space: pre-wrap; } - - - + + + @@ -66,12 +68,12 @@ p, li { white-space: pre-wrap; } - - - + + + @@ -106,10 +108,10 @@ p, li { white-space: pre-wrap; } - + diff --git a/src/openms/CMakeLists.txt b/src/openms/CMakeLists.txt index cfce1da42ac..1810d13a8ef 100644 --- a/src/openms/CMakeLists.txt +++ b/src/openms/CMakeLists.txt @@ -70,11 +70,6 @@ add_subdirectory(extern) #------------------------------------------------------------------------------ #include(${PROJECT_SOURCE_DIR}/cmake_findExternalLibs.cmake) -#------------------------------------------------------------------------------ -# At this point make a summary of where data and doc will be located: -message(STATUS "Info: CF_OPENMS_DATA_PATH: ${CF_OPENMS_DATA_PATH}") -message(STATUS "Info: CF_OPENMS_DOC_PATH: ${CF_OPENMS_DOC_PATH}") - #------------------------------------------------------------------------------ # configure config.h #------------------------------------------------------------------------------ diff --git a/src/openms/configh.cmake b/src/openms/configh.cmake index 96e07e2221c..f57be1a4cbf 100755 --- a/src/openms/configh.cmake +++ b/src/openms/configh.cmake @@ -39,6 +39,12 @@ set(CF_OPENMS_DOC_PATH ${OPENMS_HOST_DIRECTORY}/doc CACHE INTERNAL "Path to the set(CF_OPENMS_INSTALL_DATA_PATH ${CMAKE_INSTALL_PREFIX}/${INSTALL_SHARE_DIR} CACHE INTERNAL "Path to the installed shared documents of OpenMS.") set(CF_OPENMS_INSTALL_DOC_PATH ${CMAKE_INSTALL_PREFIX}/${INSTALL_DOC_DIR} CACHE INTERNAL "Path to the installed documentation of OpenMS." ) +#------------------------------------------------------------------------------ +# At this point make a summary of where data and doc will be located: +message(STATUS "Info: CF_OPENMS_DATA_PATH: ${CF_OPENMS_DATA_PATH}") +message(STATUS "Info: CF_OPENMS_DOC_PATH: ${CF_OPENMS_DOC_PATH}") + + ## check for Microsoft Visual Studio compiler if (MSVC) set(OPENMS_COMPILER_MSVC "1" CACHE INTERNAL "Do we use Microsoft Compiler?") diff --git a/src/openms/include/OpenMS/ANALYSIS/ID/AScore.h b/src/openms/include/OpenMS/ANALYSIS/ID/AScore.h index 920403363d4..cb6267d065e 100644 --- a/src/openms/include/OpenMS/ANALYSIS/ID/AScore.h +++ b/src/openms/include/OpenMS/ANALYSIS/ID/AScore.h @@ -124,7 +124,7 @@ namespace OpenMS void computeSiteDeterminingIons_(const std::vector& th_spectra, const ProbablePhosphoSites& candidates, std::vector& site_determining_ions) const; /// return all phospho sites - std::vector getSites_(const AASequence& without_phospho) const; + std::vector getSites_(const String& unmodified_sequence) const; /// calculate all n_phosphorylation_events sized sets of phospho sites (all versions of the peptides with exactly n_phosphorylation_events) std::vector> computePermutations_(const std::vector& sites, Int n_phosphorylation_events) const; diff --git a/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h b/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h new file mode 100644 index 00000000000..16747c697b2 --- /dev/null +++ b/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h @@ -0,0 +1,161 @@ +// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Chris Bielow, Philipp Wang $ +// $Authors: Chris Bielow, Philipp Wang $ +// -------------------------------------------------------------------------- + +#pragma once + +#include +#include + +#include +#include + + + +namespace OpenMS +{ + /** + @brief The Neighbor Peptide functionality is designed to find peptides (neighbors) in a given set of sequences (FASTA file) that are + similar to a target peptide (aka relevant peptide) based on mass and spectral characteristics. This provides more power + when searching complex samples, when only a subset of the peptides/proteins is of interest. + + The paper on subset neighbor search is www.ncbi.nlm.nih.gov/pmc/articles/PMC8489664/ + DOI: 10.1021/acs.jproteome.1c00483 + */ + class OPENMS_DLLAPI NeighborSeq + { + + public: + /// Constructor + /// @param digested_relevant_peptides A vector of digested relevant peptides + NeighborSeq(std::vector&& digested_relevant_peptides); + + /** + * @brief Generates a theoretical spectrum for a given peptide sequence with b/y ions at charge 1. + * + * Includes all b and y ions with charge 1 (even the prefix ions, e.g. b1), but no internal ions. + * + * @param peptide_sequence The peptide sequence for which to generate the spectrum. + * @return The generated theoretical spectrum. + */ + MSSpectrum generateSpectrum(const AASequence& peptide_sequence); + + /** + * @brief Compares two spectra to determine if they share a sufficient number of ions. + * + * All peaks are considered. Use generateSpectrum() to generate theoretical spectra with b/y ions. + * + * @param spec1 The first theoretical spectrum. + * @param spec2 The second theoretical spectrum. + * @param min_shared_ion_fraction The minimal required proportion of shared ions in [0, 1] + * @param mz_bin_size Bin size for the m/z values, which determines if two peaks are considered to be the same (typically, 0.05 for high resolution and 1.0005079 for low resolution). + * @return True if the spectra share a sufficient number of ions, false otherwise. + */ + static bool isNeighborSpectrum(const MSSpectrum& spec1, const MSSpectrum& spec2, const double min_shared_ion_fraction, const double mz_bin_size); + /** + * @brief Compute the number of shared ions between two spectra + * + * All peaks are considered. Use generateSpectrum() to generate theoretical spectra with b/y ions. + * + * @param spec1 The first theoretical spectrum. + * @param spec2 The second theoretical spectrum. + * @param mz_bin_size Bin size for the m/z values, which determines if two peaks are considered to be the same. + * @return The number of shared ions + */ + static int computeSharedIonCount(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& mz_bin_size); + + /** + * @brief Is this peptide a neighbor to one of the relevant peptides? + * + * Also updates the internal statistics, which can be retrieved using getNeighborStats(). + * + * @param neighbor_candidate The peptide sequence (from a neighbor protein) to compare against the internal relevant peptides (see constructor). + * @param mass_tolerance_pc Maximal precursor mass difference (in Da or ppm; see 'mass_tolerance_pc_ppm') between neighbor and relevant peptide. + * @param mass_tolerance_pc_ppm Is 'mass_tolerance_pc' in Da or ppm? + * @param min_shared_ion_fraction The ion tolerance for neighbor peptides. + * @param mz_bin_size Bin size for spectra m/z comparison (the original study suggests 0.05 Th for high-res and 1.0005079 Th for low-res spectra). + * @return true if @p neighbor_candidate is neighbor to one or more relevant peptides, false otherwise. + */ + bool isNeighborPeptide(const AASequence& neighbor_candidate, + const double mass_tolerance_pc, + const bool mass_tolerance_pc_ppm, + const double min_shared_ion_fraction, + const double mz_bin_size); + + /// Statistics of how many neighbors were found per reference peptide + struct NeighborStats + { + /** @name NeigborStats_members + * Mutually exclusive categories of how many neighbors were found per reference peptide + */ + ///@{ + int unfindable_peptides = 0; ///< how many ref-peptides contain an 'X' (unknown amino acid) and thus cannot be searched for neighbors + int findable_no_neighbors = 0; ///< how many peptides had no neighbors? + int findable_one_neighbor = 0; ///< how many peptides had exactly one neighbor? + int findable_multiple_neighbors = 0; ///< how many peptides had multiple neighbors? + ///@} + + /// Sum of all 4 categories + int total() const + { + return unfindable_peptides + findable_no_neighbors + findable_one_neighbor + findable_multiple_neighbors; + } + /// Number of reference peptides that contain an 'X' (unknown amino acid), formatted as 'X (Y%)' + String unfindable() const + { + return String(unfindable_peptides) + " (" + unfindable_peptides * 100 / total() + "%)"; + } + + /// Number of reference peptides that had no neighbors, formatted as 'X (Y%)' + String noNB() const + { + return String(findable_no_neighbors) + " (" + findable_no_neighbors * 100 / total() + "%)"; + } + /// Number of reference peptides that had exactly one neighbor, formatted as 'X (Y%)' + String oneNB() const + { + return String(findable_one_neighbor) + " (" + findable_one_neighbor * 100 / total() + "%)"; + } + /// Number of reference peptides that had multiple neighbors, formatted as 'X (Y%)' + String multiNB() const + { + return String(findable_multiple_neighbors) + " (" + findable_multiple_neighbors * 100 / total() + "%)"; + } + }; + + /// after calling isNeighborPeptide() multiple times, this function returns the statistics of how many neighbors were found per reference peptide + NeighborStats getNeighborStats() const; + + protected: + /** + * @brief Creates a map of masses to positions from the internal relevant peptides. + * @return A map where the key is the mass and the value is a vector of positions. + */ + std::map> createMassLookup_(); + + /** + * @brief Finds candidate positions based on a given mono-isotopic weight and mass tolerance. + * @param mono_weight The mono-isotopic weight to find candidates for. + * @param mass_tolerance The allowed tolerance for matching the mass. + * @param mass_tolerance_pc_ppm Whether the mass tolerance is in ppm. + * @return A pair of begin/end iterators into mass_position_map_ for the candidate positions + */ + auto findCandidatePositions_(const double mono_weight, double mass_tolerance, const bool mass_tolerance_pc_ppm); + + + private: + const std::vector& digested_relevant_peptides_; ///< digested relevant peptides + std::map> mass_position_map_; ///< map of masses to positions in digested_relevant_peptides_ + + TheoreticalSpectrumGenerator spec_gen_; ///< for b/y ions with charge 1 + const Residue* x_residue_; ///< residue for unknown amino acid + + std::vector neighbor_stats_; ///< how many neighbors per reference peptide searched using isNeighborPeptide()? + + }; // class NeighborSeq + +} // namespace OpenMS diff --git a/src/openms/include/OpenMS/ANALYSIS/ID/sources.cmake b/src/openms/include/OpenMS/ANALYSIS/ID/sources.cmake index 9484ece68a7..823a3cfb83d 100644 --- a/src/openms/include/OpenMS/ANALYSIS/ID/sources.cmake +++ b/src/openms/include/OpenMS/ANALYSIS/ID/sources.cmake @@ -34,6 +34,7 @@ IonIdentityMolecularNetworking.h MessagePasserFactory.h MetaboliteSpectralMatching.h MorpheusScore.h +NeighborSeq.h PeptideIndexing.h PeptideProteinResolution.h PercolatorFeatureSetHelper.h diff --git a/src/openms/include/OpenMS/APPLICATIONS/ParameterInformation.h b/src/openms/include/OpenMS/APPLICATIONS/ParameterInformation.h index 7e8d9192fa2..d82f0ed8ae7 100644 --- a/src/openms/include/OpenMS/APPLICATIONS/ParameterInformation.h +++ b/src/openms/include/OpenMS/APPLICATIONS/ParameterInformation.h @@ -28,6 +28,7 @@ namespace OpenMS INPUT_FILE, ///< String parameter that denotes an input file OUTPUT_FILE, ///< String parameter that denotes an output file OUTPUT_PREFIX, ///< String parameter that denotes an output file prefix + OUTPUT_DIR, ///< String parameter that denotes an output directory DOUBLE, ///< Floating point number parameter INT, ///< Integer parameter STRINGLIST, ///< More than one String Parameter diff --git a/src/openms/include/OpenMS/APPLICATIONS/TOPPBase.h b/src/openms/include/OpenMS/APPLICATIONS/TOPPBase.h index 051191fb17f..9f91501096a 100644 --- a/src/openms/include/OpenMS/APPLICATIONS/TOPPBase.h +++ b/src/openms/include/OpenMS/APPLICATIONS/TOPPBase.h @@ -121,6 +121,12 @@ namespace OpenMS class OPENMS_DLLAPI TOPPBase { public: + inline static const char* TAG_OUTPUT_FILE = "output file"; + inline static const char* TAG_INPUT_FILE = "input file"; + inline static const char* TAG_OUTPUT_DIR = "output dir"; + inline static const char* TAG_OUTPUT_PREFIX = "output prefix"; + inline static const char* TAG_ADVANCED = "advanced"; + inline static const char* TAG_REQUIRED = "required"; /// Exit codes enum ExitCodes @@ -188,6 +194,9 @@ namespace OpenMS /// Returns a link to the documentation of the tool (accessible on our servers and only after inclusion in the nightly branch or a release). String getDocumentationURL() const; + /// The latest and greatest OpenMS citation + static const Citation cite_openms; + private: /// Tool name. This is assigned once and for all in the constructor. String const tool_name_; @@ -443,7 +452,7 @@ namespace OpenMS @param default_value Default argument @param description Description of the parameter. Indentation of newline is done automatically. @param required If the user has to provide a value i.e. if the value has to differ from the default (checked in get-method) - @param advanced If @em true, this parameter is advanced and by default hidden in the GUI. + @param advanced If @em true, this parameter is advanced and by default hidden in the GUI and during --help. */ void registerStringOption_(const String& name, const String& argument, const String& default_value, const String& description, bool required = true, bool advanced = false); @@ -478,7 +487,7 @@ namespace OpenMS @param default_value Default argument @param description Description of the parameter. Indentation of newline is done automatically. @param required If the user has to provide a value i.e. if the value has to differ from the default (verified in getStringOption()) - @param advanced If @em true, this parameter is advanced and by default hidden in the GUI. + @param advanced If @em true, this parameter is advanced and by default hidden in the GUI and during --help. @param tags A list of tags, extending/omitting automated checks on the input file (e.g. when its an executable) Valid tags: @em 'skipexists' - will prevent checking if the given file really exists (useful for partial paths, e.g. in OpenMS/share/... which will be resolved by the TOPP tool internally) @em 'is_executable' - checks existence of the file first using its actual value, and upon failure also using the PATH environment (and common exe file endings on Windows, e.g. .exe and .bat). @@ -496,7 +505,7 @@ namespace OpenMS @param default_value Default argument @param description Description of the parameter. Indentation of newline is done automatically. @param required If the user has to provide a value i.e. if the value has to differ from the default (checked in get-method) - @param advanced If @em true, this parameter is advanced and by default hidden in the GUI. + @param advanced If @em true, this parameter is advanced and by default hidden in the GUI and during --help. */ void registerOutputFile_(const String& name, const String& argument, const String& default_value, const String& description, bool required = true, bool advanced = false); @@ -518,10 +527,25 @@ namespace OpenMS @param default_value Default value (remember, no extension is specified here) @param description Description of the parameter. Indentation of newline is done automatically. @param required If the user has to provide a value i.e. if the value has to differ from the default (checked in get-method) - @param advanced If @em true, this parameter is advanced and by default hidden in the GUI. + @param advanced If @em true, this parameter is advanced and by default hidden in the GUI and during --help. */ void registerOutputPrefix_(const String& name, const String& argument, const String& default_value, const String& description, bool required = true, bool advanced = false); + /** + @brief Registers an output directory used for tools with multiple output files which are not an output file list, i.e. do not correspond to the number of input files. + + @note Setting format(s) via setValidFormat_ for an output directory is not possible as directories do not have a file extension. + + @param name Name of the option in the command line and the INI file + @param argument Argument description text for the help output + @param default_value Default value + @param description Description of the parameter. Indentation of newline is done automatically. + @param required If the user has to provide a value i.e. if the value has to differ from the default (checked in get-method) + @param advanced If @em true, this parameter is advanced and by default hidden in the GUI and during --help. + */ + void registerOutputDir_(const String& name, const String& argument, const String& default_value, const String& description, bool required = true, bool advanced = false); + + /** @brief Sets the formats for a input/output file option or for all members of an input/output file lists @@ -545,7 +569,7 @@ namespace OpenMS @param default_value Default argument @param description Description of the parameter. Indentation of newline is done automatically. @param required If the user has to provide a value i.e. if the value has to differ from the default (checked in get-method) - @param advanced If @em true, this parameter is advanced and by default hidden in the GUI. + @param advanced If @em true, this parameter is advanced and by default hidden in the GUI and during --help. */ void registerDoubleOption_(const String& name, const String& argument, double default_value, const String& description, bool required = true, bool advanced = false); @@ -582,7 +606,7 @@ namespace OpenMS @param default_value Default argument @param description Description of the parameter. Indentation of newline is done automatically. @param required If the user has to provide a value i.e. if the value has to differ from the default (checked in get-method) - @param advanced If @em true, this parameter is advanced and by default hidden in the GUI. + @param advanced If @em true, this parameter is advanced and by default hidden in the GUI and during --help. */ void registerIntOption_(const String& name, const String& argument, Int default_value, const String& description, @@ -596,7 +620,7 @@ namespace OpenMS @param default_value Default argument @param description Description of the parameter. Indentation of newline is done automatically. @param required If the user has to provide a value i.e. if the value has to differ from the default (checked in get-method) - @param advanced If @em true, this parameter is advanced and by default hidden in the GUI. + @param advanced If @em true, this parameter is advanced and by default hidden in the GUI and during --help. */ void registerIntList_(const String& name, const String& argument, const IntList& default_value, const String& description, bool required = true, bool advanced = false); @@ -609,7 +633,7 @@ namespace OpenMS @param default_value Default argument @param description Description of the parameter. Indentation of newline is done automatically. @param required If the user has to provide a value i.e. if the value has to differ from the default (checked in get-method) - @param advanced If @em true, this parameter is advanced and by default hidden in the GUI. + @param advanced If @em true, this parameter is advanced and by default hidden in the GUI and during --help. */ void registerDoubleList_(const String& name, const String& argument, const DoubleList& default_value, const String& description, bool required = true, bool advanced = false); @@ -621,7 +645,7 @@ namespace OpenMS @param default_value Default argument @param description Description of the parameter. Indentation of newline is done automatically. @param required If the user has to provide a value i.e. if the value has to differ from the default (checked in get-method) - @param advanced If @em true, this parameter is advanced and by default hidden in the GUI. + @param advanced If @em true, this parameter is advanced and by default hidden in the GUI and during --help. */ void registerStringList_(const String& name, const String& argument, const StringList& default_value, const String& description, bool required = true, bool advanced = false); @@ -636,7 +660,7 @@ namespace OpenMS @param default_value Default argument @param description Description of the parameter. Indentation of newline is done automatically. @param required If the user has to provide a value i.e. if the value has to differ from the default (checked in get-method) - @param advanced If @em true, this parameter is advanced and by default hidden in the GUI. + @param advanced If @em true, this parameter is advanced and by default hidden in the GUI and during --help. @param tags A list of tags, extending/omitting automated checks on the input file (e.g. when its an executable) Valid tags: 'skipexists' - will prevent checking if the given file really exists (useful for partial paths, e.g. in OpenMS/share/... which will be resolved by the TOPP tool internally) 'is_executable' - checks existence of the file using the PATH environment (and common exe file endings on Windows, e.g. .exe and .bat). @@ -654,7 +678,7 @@ namespace OpenMS @param default_value Default argument @param description Description of the parameter. Indentation of newline is done automatically. @param required If the user has to provide a value i.e. if the value has to differ from the default (checked in get-method) - @param advanced If @em true, this parameter is advanced and by default hidden in the GUI. + @param advanced If @em true, this parameter is advanced and by default hidden in the GUI and during --help. */ void registerOutputFileList_(const String& name, const String& argument, const StringList& default_value, const String& description, bool required = true, bool advanced = false); @@ -687,7 +711,7 @@ namespace OpenMS /** - @brief Returns the value of a previously registered string option + @brief Returns the value of a previously registered string option (use `getOutputDirOption()` for output directories) @exception Exception::UnregisteredParameter is thrown if the parameter was not registered @exception Exception::RequiredParameterNotGiven is if a required parameter is not present @@ -695,6 +719,16 @@ namespace OpenMS @exception Exception::InvalidParameter is thrown if the parameter restrictions are not met */ String getStringOption_(const String& name) const; + + /** + @brief Returns the value of a previously registered output_dir option + + @exception Exception::UnregisteredParameter is thrown if the parameter was not registered + @exception Exception::RequiredParameterNotGiven is if a required parameter is not present + @exception Exception::WrongParameterType is thrown if the parameter has the wrong type + @exception Exception::InvalidParameter is thrown if the parameter restrictions are not met + */ + String getOutputDirOption(const String& name) const; /** @brief Returns the value of a previously registered double option @@ -960,9 +994,6 @@ namespace OpenMS /// .TOPP.ini file for storing system default parameters static String topp_ini_file_; - /// The OpenMS citation - static const Citation cite_openms_; - /// Debug level set by -debug Int debug_level_; private: diff --git a/src/openms/include/OpenMS/CHEMISTRY/ModificationsDB.h b/src/openms/include/OpenMS/CHEMISTRY/ModificationsDB.h index 6d6c9cec722..db4c349ecc3 100644 --- a/src/openms/include/OpenMS/CHEMISTRY/ModificationsDB.h +++ b/src/openms/include/OpenMS/CHEMISTRY/ModificationsDB.h @@ -54,7 +54,7 @@ namespace OpenMS static ModificationsDB* getInstance(); /// Initializes the modification DB with non-default modification files (can only be done once) - static ModificationsDB* initializeModificationsDB(OpenMS::String unimod_file = "CHEMISTRY/unimod.xml", OpenMS::String psimod_file = "CHEMISTRY/PSI-MOD.obo", OpenMS::String xlmod_file = "CHEMISTRY/XLMOD.obo"); + static ModificationsDB* initializeModificationsDB(OpenMS::String unimod_file = "CHEMISTRY/unimod.xml", OpenMS::String custommod_file = "CHEMISTRY/custom_mods.xml", OpenMS::String psimod_file = "CHEMISTRY/PSI-MOD.obo", OpenMS::String xlmod_file = "CHEMISTRY/XLMOD.obo"); /// Check whether ModificationsDB was instantiated before static bool isInstantiated(); @@ -248,7 +248,7 @@ namespace OpenMS */ //@{ - explicit ModificationsDB(const OpenMS::String& unimod_file = "CHEMISTRY/unimod.xml", const OpenMS::String& psimod_file = "CHEMISTRY/PSI-MOD.obo", const OpenMS::String& xlmod_file = "CHEMISTRY/XLMOD.obo"); + explicit ModificationsDB(const OpenMS::String& unimod_file = "CHEMISTRY/unimod.xml", const OpenMS::String& custommod_file = "CHEMISTRY/custom_mods.xml", const OpenMS::String& psimod_file = "CHEMISTRY/PSI-MOD.obo", const OpenMS::String& xlmod_file = "CHEMISTRY/XLMOD.obo"); /// Copy constructor ModificationsDB(const ModificationsDB& residue_db); diff --git a/src/openms/include/OpenMS/FEATUREFINDER/FeatureFindingMetabo.h b/src/openms/include/OpenMS/FEATUREFINDER/FeatureFindingMetabo.h index ec5a783d0dc..88910c0bc19 100644 --- a/src/openms/include/OpenMS/FEATUREFINDER/FeatureFindingMetabo.h +++ b/src/openms/include/OpenMS/FEATUREFINDER/FeatureFindingMetabo.h @@ -306,7 +306,8 @@ namespace OpenMS bool enable_RT_filtering_; String isotope_filtering_model_; bool use_smoothed_intensities_; - + bool report_smoothed_intensities_; + bool use_mz_scoring_C13_; bool use_mz_scoring_by_element_range_; bool report_convex_hulls_; diff --git a/src/openms/include/OpenMS/FORMAT/FASTAFile.h b/src/openms/include/OpenMS/FORMAT/FASTAFile.h index 0629cc91860..b20b6927d14 100644 --- a/src/openms/include/OpenMS/FORMAT/FASTAFile.h +++ b/src/openms/include/OpenMS/FORMAT/FASTAFile.h @@ -102,6 +102,9 @@ namespace OpenMS */ void readStart(const String& filename); + /// same as readStart(), but does internal progress logging whenever readNextWithProgress() is called + void readStartWithProgress(const String& filename, const String& progress_label); + /** @brief Reads the next FASTA entry from file. If you want to read all entries in one go, use load(). @@ -111,7 +114,11 @@ namespace OpenMS */ bool readNext(FASTAEntry& protein); - /// current stream position + /// same as readNext(), but does internal progress logging; use readStartWithProgress() to enable this + /// Calls progressEnd() when EOF is reached (i.e. when returning false) + bool readNextWithProgress(FASTAEntry& protein); + + /// current stream position when reading a file std::streampos position(); /// is stream at EOF? diff --git a/src/openms/include/OpenMS/FORMAT/MzTab.h b/src/openms/include/OpenMS/FORMAT/MzTab.h index de39cf181d3..babd9cb6b9b 100644 --- a/src/openms/include/OpenMS/FORMAT/MzTab.h +++ b/src/openms/include/OpenMS/FORMAT/MzTab.h @@ -475,6 +475,10 @@ namespace OpenMS const MzTabPSMSectionRows& getPSMSectionRows() const; + /// Returns the number of PSMs in the PSM section (which is not necessarily the number of rows in the section, due to duplication of rows for each protein) + /// @note Relies on the PSM_ID to be set correctly for each PSM row + const size_t getNumberOfPSMs() const; + void setPSMSectionRows(const MzTabPSMSectionRows& psd); const MzTabSmallMoleculeSectionRows& getSmallMoleculeSectionRows() const; diff --git a/src/openms/include/OpenMS/FORMAT/ParamCTDFile.h b/src/openms/include/OpenMS/FORMAT/ParamCTDFile.h index 9d5c4fd1f96..180b0ba0131 100644 --- a/src/openms/include/OpenMS/FORMAT/ParamCTDFile.h +++ b/src/openms/include/OpenMS/FORMAT/ParamCTDFile.h @@ -79,7 +79,7 @@ namespace OpenMS */ static void replace(std::string& replace_in, char to_replace, const std::string& replace_with); - const std::string schema_location_ = "/SCHEMAS/Param_1_7_0.xsd"; - const std::string schema_version_ = "1.7.0"; + const std::string schema_location_ = "/SCHEMAS/Param_1_8_0.xsd"; + const std::string schema_version_ = "1.8.0"; }; } diff --git a/src/openms/include/OpenMS/FORMAT/PercolatorInfile.h b/src/openms/include/OpenMS/FORMAT/PercolatorInfile.h index 2e5da34cba2..ccca14828eb 100644 --- a/src/openms/include/OpenMS/FORMAT/PercolatorInfile.h +++ b/src/openms/include/OpenMS/FORMAT/PercolatorInfile.h @@ -30,19 +30,42 @@ namespace OpenMS int min_charge, int max_charge); - /** @brief load pin file and convert to a vector of PeptideIdentification using the given score column @p score_name and orientation @p higher_score_better. - If a decoy prefix is provided, the decoy status is set from the protein accessions. - Otherwise, it assumes that the pin file already contains the correctly annotated decoy status. - If @p extra_scores is not empty, the scores are added to the PeptideHit as MetaValues. - If a filename column is encountered the set of @p filenames is filled in the order of appearance and PeptideIdentifications annotated with the id_merge_index meta value to link them to the filename (similar to a merged idXML file). - TODO: implement something similar to PepXMLFile().setPreferredFixedModifications(getModifications_(fixed_modifications_names)); - **/ + + /** + * @brief Loads peptide identifications from a Percolator input file. + * + * This function reads a Percolator input file (`pin_file`) and returns a vector of `PeptideIdentification` objects. + * It extracts relevantinformation such as peptide sequences, scores, charges, annotations, and protein accessions, applying + * specified thresholds and handling decoy targets as needed. + * Note: If a filename column is encountered the set of @p filenames is filled in the order of appearance and PeptideIdentifications annotated with the id_merge_index meta value to link them to the filename (similar to a merged idXML file). + * + * @param pin_file he path to the Percolator input file with a `.pin` extension. + * + * @param higher_score_better A boolean flag indicating whether higher scores are considered better (`true`) or lower scores are better (`false`). + * + * @param score_name The name of the primary score to be used for ranking peptide hits. + * + * @param extra_scores A list of additional score names that should be extracted and stored in each `PeptideHit`. + * + * @param filenames Will be populated with the unique raw file names extracted from the input data. + * + * @param decoy_prefix The prefix used to identify decoy protein accessions. Proteins with accessions starting with this prefix are marked as decoys. Otherwise, it assumes that the pin file already contains the correctly annotated decoy status. + * @param threshold A double value representing the threshold for the `spectrum_q` value. Only spectra with `spectrum_q` below this threshold are processed. + Implemented to allow prefiltering of Sage results. + * @param SageAnnotation A boolean value used to determine if the pin file is coming from Sage or not + * @return A `std::vector` of `PeptideIdentification` objects containing the peptide identifications. + + * @throws `Exception::ParseError` if any line in the input file does not have the expected number of columns. + * TODO: implement something similar to PepXMLFile().setPreferredFixedModifications(getModifications_(fixed_modifications_names)); + */ static std::vector load(const String& pin_file, bool higher_score_better, const String& score_name, const StringList& extra_scores, StringList& filenames, - String decoy_prefix = ""); + String decoy_prefix = "", + double threshold = 0.01, + bool SageAnnotation = false); // uses spectrum_reference, if empty uses spectrum_id, if also empty fall back to using index static String getScanIdentifier(const PeptideIdentification& pid, size_t index); diff --git a/src/openms/include/OpenMS/FORMAT/SwathFile.h b/src/openms/include/OpenMS/FORMAT/SwathFile.h index 9244f00501b..d3b0eef045a 100644 --- a/src/openms/include/OpenMS/FORMAT/SwathFile.h +++ b/src/openms/include/OpenMS/FORMAT/SwathFile.h @@ -92,7 +92,8 @@ namespace OpenMS /// Counts the number of scans in a full Swath file (e.g. concatenated non-split file) void countScansInSwath_(const std::vector& exp, std::vector& swath_counter, int& nr_ms1_spectra, - std::vector& known_window_boundaries); + std::vector& known_window_boundaries, + double TOLERANCE=1e-6); }; } diff --git a/src/openms/include/OpenMS/KERNEL/MSSpectrum.h b/src/openms/include/OpenMS/KERNEL/MSSpectrum.h index de505f73efb..7d88a69385e 100644 --- a/src/openms/include/OpenMS/KERNEL/MSSpectrum.h +++ b/src/openms/include/OpenMS/KERNEL/MSSpectrum.h @@ -151,6 +151,9 @@ namespace OpenMS /// Constructor MSSpectrum(); + /// Constructor from a list of Peak1D, e.g. MSSpectrum spec{ {mz1, int1}, {mz2, int2}, ... }; + MSSpectrum(const std::initializer_list& init); + /// Copy constructor MSSpectrum(const MSSpectrum& source); @@ -602,16 +605,16 @@ namespace OpenMS protected: /// Retention time - double retention_time_; + double retention_time_ = -1; /// Drift time - double drift_time_; + double drift_time_ = -1; /// Drift time unit - DriftTimeUnit drift_time_unit_; + DriftTimeUnit drift_time_unit_ = DriftTimeUnit::NONE; /// MS level - UInt ms_level_; + UInt ms_level_ = 1; /// Name String name_; diff --git a/src/openms/include/OpenMS/SYSTEM/File.h b/src/openms/include/OpenMS/SYSTEM/File.h index b304586a84f..4013e9e84d5 100644 --- a/src/openms/include/OpenMS/SYSTEM/File.h +++ b/src/openms/include/OpenMS/SYSTEM/File.h @@ -113,6 +113,9 @@ namespace OpenMS enum class CopyOptions {OVERWRITE,SKIP,CANCEL}; static bool copyDirRecursively(const QString &from_dir, const QString &to_dir, File::CopyOptions option = CopyOptions::OVERWRITE); + /// Copy a file (if it exists). Returns true if successful. + static bool copy(const String& from, const String& to); + /** @brief Removes a file (if it exists). @@ -126,6 +129,10 @@ namespace OpenMS /// Removes the directory and all subdirectories (absolute path). static bool removeDir(const QString& dir_name); + /// Creates a directory (absolute path or relative to the current working dir), even if subdirectories do not exist. Returns true if successful. + /// If the path already exists when this function is called, it will return true. + static bool makeDir(const String& dir_name); + /// Replaces the relative path in the argument with the absolute path. static String absolutePath(const String& file); diff --git a/src/openms/source/ANALYSIS/ID/AScore.cpp b/src/openms/source/ANALYSIS/ID/AScore.cpp index f310d07b533..3f1368b99d6 100644 --- a/src/openms/source/ANALYSIS/ID/AScore.cpp +++ b/src/openms/source/ANALYSIS/ID/AScore.cpp @@ -55,18 +55,19 @@ namespace OpenMS } String sequence_str = phospho.getSequence().toString(); + String unmodified_sequence_str = phospho.getSequence().toUnmodifiedString(); Size number_of_phosphorylation_events = numberOfPhosphoEvents_(sequence_str); AASequence seq_without_phospho = removePhosphositesFromSequence_(sequence_str); - if ((max_peptide_length_ > 0) && (seq_without_phospho.toUnmodifiedString().size() > max_peptide_length_)) + if ((max_peptide_length_ > 0) && (unmodified_sequence_str.size() > max_peptide_length_)) { OPENMS_LOG_DEBUG << "\tcalculation aborted: peptide too long: " << seq_without_phospho.toString() << std::endl; return phospho; } // determine all phospho sites - vector sites = getSites_(seq_without_phospho); + vector sites = getSites_(unmodified_sequence_str); Size number_of_STY = sites.size(); if (number_of_phosphorylation_events == 0 || number_of_STY == 0) @@ -215,7 +216,7 @@ namespace OpenMS double pow1 = pow((double)p, (int)k); double pow2 = pow(double(1 - p), double(N - k)); - + score += coeff * pow1 * pow2; } @@ -381,10 +382,9 @@ namespace OpenMS / 7.0; } - vector AScore::getSites_(const AASequence& without_phospho) const + vector AScore::getSites_(const String& unmodified) const { vector tupel; - String unmodified = without_phospho.toUnmodifiedString(); for (Size i = 0; i < unmodified.size(); ++i) { if (unmodified[i] == 'Y' || unmodified[i] == 'T' || unmodified[i] == 'S') @@ -501,7 +501,7 @@ namespace OpenMS } return th_spectra; } - + std::vector AScore::peakPickingPerWindowsInSpectrum_(PeakSpectrum& real_spectrum) const { vector windows_top10; diff --git a/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp b/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp new file mode 100644 index 00000000000..d1b480e94d5 --- /dev/null +++ b/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp @@ -0,0 +1,177 @@ +// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Chris Bielow, Philipp Wang $ +// $Authors: Chris Bielow, Philipp Wang $ +// -------------------------------------------------------------------------- +#include +#include +#include +#include + +#include + +using namespace OpenMS; +using namespace std; + + +NeighborSeq::NeighborSeq(std::vector&& digested_relevant_peptides) + : digested_relevant_peptides_(std::move(digested_relevant_peptides)), + neighbor_stats_(digested_relevant_peptides_.size(), 0) +{ + Param params; + params.setValue("add_b_ions", "true"); + params.setValue("add_y_ions", "true"); + params.setValue("add_first_prefix_ion", "true"); // do not skip b1 ion + spec_gen_.setParameters(params); + + x_residue_ = ResidueDB::getInstance()->getResidue('X'); + + // Index peptide masses for fast lookup + mass_position_map_ = createMassLookup_(); +} + +// Function to generate the theoretical spectrum for a given peptide sequence +MSSpectrum NeighborSeq::generateSpectrum(const AASequence& peptide_sequence) +{ + MSSpectrum spectrum; + spec_gen_.getSpectrum(spectrum, peptide_sequence, 1, 1); + return spectrum; +} + +int NeighborSeq::computeSharedIonCount(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& mz_bin_size) +{ + // compute shared b/y ions in two sorted ranges + auto setIntersectionCount = [mz_bin_size](auto first1, auto last1, auto first2, auto last2) -> Size + { + Size count {0}; + while (first1 != last1 && first2 != last2) + { + auto val1 = int(first1->getMZ() / mz_bin_size); + auto val2 = int(first2->getMZ() / mz_bin_size); + if (val1 < val2) ++first1; + else + { + if (val1 == val2) + { + ++first1; + ++count; + } + ++first2; + } + } + return count; + }; + + auto shared_ions = setIntersectionCount(spec1.begin(), spec1.end(), spec2.begin(), spec2.end()); + + return shared_ions; +} + +// Function to compare two spectra and determine if they are similar +bool NeighborSeq::isNeighborSpectrum(const MSSpectrum& spec1, const MSSpectrum& spec2, const double min_shared_ion_fraction, const double mz_bin_size) +{ + // Calculate the number of shared bins considering the bin frequencies + int B12 = computeSharedIonCount(spec1, spec2, mz_bin_size); + + // Calculate the fraction of shared bins + double fraction_shared = (2.0 * B12) / (spec1.size() + spec2.size()); + + return fraction_shared > min_shared_ion_fraction; +} + +//Finds candidate positions based on a given mono-isotopic weight and mass tolerance. +auto NeighborSeq::findCandidatePositions_(const double mono_weight, double mass_tolerance, const bool mass_tolerance_pc_ppm) +{ + // Calculate the lower and upper bounds for the mass tolerance range + assert(mass_tolerance >= 0); + if (mass_tolerance_pc_ppm) + { + mass_tolerance = Math::ppmToMass(mono_weight, mass_tolerance); + } + + // Find the lower bound iterator in the map + auto lower = mass_position_map_.lower_bound(mono_weight - mass_tolerance); + + // Find the upper bound iterator in the map + auto upper = mass_position_map_.upper_bound(mono_weight + mass_tolerance); + + return make_pair(lower, upper); +} + +// Method to find neighbor peptides in a given FASTA file +bool NeighborSeq::isNeighborPeptide(const AASequence& peptide, + const double mass_tolerance_pc, + const bool mass_tolerance_pc_ppm, + const double min_shared_ion_fraction, + const double mz_bin_size) + +{ + auto [from, to] = findCandidatePositions_(peptide.getMonoWeight(), mass_tolerance_pc, mass_tolerance_pc_ppm); + if (from == to) return false; + + bool found = false; + MSSpectrum spec = generateSpectrum(peptide); + for (auto it_rel_pep = from; it_rel_pep != to; ++it_rel_pep) + { + for (int pep_index : it_rel_pep->second) + { + MSSpectrum neighbor_spec = generateSpectrum(digested_relevant_peptides_[pep_index]); + if (isNeighborSpectrum(spec, neighbor_spec, min_shared_ion_fraction, mz_bin_size)) + { + //std::cout << digested_relevant_peptides_[pep_index] << " has neighbor " << peptide << '\n'; + neighbor_stats_[pep_index]++; + found = true; + } + } + } + return found; +} + +map> NeighborSeq::createMassLookup_() +{ + // Map to store the mass and corresponding positions + map> mass_position_map; + + int skipped{0}; + // Iterate through the vector of AASequence objects + for (size_t i = 0; i < digested_relevant_peptides_.size(); ++i) + { + if (digested_relevant_peptides_[i].has(*x_residue_)) + { + neighbor_stats_[i] = -1; // mark as not findable + skipped++; + continue; + } + // Calculate the mono-isotopic mass of the sequence + double mass = digested_relevant_peptides_[i].getMonoWeight(); + + // Insert the mass and the position into the map + mass_position_map[mass].push_back(i); + } + OPENMS_LOG_WARN << "Skipped " << skipped << "/" << digested_relevant_peptides_.size() + << " peptides with unknown('X') amino acids." << endl; + return mass_position_map; +} + +NeighborSeq::NeighborStats NeighborSeq::getNeighborStats() const +{ + NeighborStats stats; + for (int count : neighbor_stats_) + { + if (count == -1) + stats.unfindable_peptides++; + else if (count == 0) + stats.findable_no_neighbors++; + else if (count == 1) + stats.findable_one_neighbor++; + else + stats.findable_multiple_neighbors++; + } + return stats; +} + + + + diff --git a/src/openms/source/ANALYSIS/ID/sources.cmake b/src/openms/source/ANALYSIS/ID/sources.cmake index 41cc05a6d59..de1522abfb9 100644 --- a/src/openms/source/ANALYSIS/ID/sources.cmake +++ b/src/openms/source/ANALYSIS/ID/sources.cmake @@ -34,6 +34,7 @@ IonIdentityMolecularNetworking.cpp MessagePasserFactory.cpp MetaboliteSpectralMatching.cpp MorpheusScore.cpp +NeighborSeq.cpp PeptideProteinResolution.cpp PeptideIndexing.cpp PercolatorFeatureSetHelper.cpp diff --git a/src/openms/source/ANALYSIS/QUANTITATION/IsobaricIsotopeCorrector.cpp b/src/openms/source/ANALYSIS/QUANTITATION/IsobaricIsotopeCorrector.cpp index 02d81f8d656..6a3051f6b79 100644 --- a/src/openms/source/ANALYSIS/QUANTITATION/IsobaricIsotopeCorrector.cpp +++ b/src/openms/source/ANALYSIS/QUANTITATION/IsobaricIsotopeCorrector.cpp @@ -43,14 +43,10 @@ namespace OpenMS { OPENMS_LOG_DEBUG << "Correction matrix is the identity matrix." << std::endl; OPENMS_LOG_DEBUG << correction_matrix << std::endl; - - // workaround: TMT11plex has a special case where the correction matrix is the identity matrix - if (quant_method->getMethodName() != "tmt11plex") - { - throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, + + throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "IsobaricIsotopeCorrector: The given isotope correction matrix is an identity matrix leading to no correction. " - "Please provide a valid isotope_correction matrix as it was provided with the sample kit!"); - } + "Please provide a valid isotope_correction matrix as it was provided with the sample kit!"); } Eigen::FullPivLU ludecomp(correction_matrix.getEigenMatrix()); diff --git a/src/openms/source/ANALYSIS/QUANTITATION/TMTElevenPlexQuantitationMethod.cpp b/src/openms/source/ANALYSIS/QUANTITATION/TMTElevenPlexQuantitationMethod.cpp index 713b1c2b0a6..724fb5903cd 100644 --- a/src/openms/source/ANALYSIS/QUANTITATION/TMTElevenPlexQuantitationMethod.cpp +++ b/src/openms/source/ANALYSIS/QUANTITATION/TMTElevenPlexQuantitationMethod.cpp @@ -75,18 +75,21 @@ void TMTElevenPlexQuantitationMethod::setDefaultParams_() defaults_.setValue("reference_channel", "126", "The reference channel (126, 127N, 127C, 128N, 128C, 129N, 129C, 130N, 130C, 131N, 131C)."); defaults_.setValidStrings("reference_channel", TMTElevenPlexQuantitationMethod::channel_names_); - defaults_.setValue("correction_matrix", std::vector{"0.0/0.0/0.0/0.0", - "0.0/0.0/0.0/0.0", - "0.0/0.0/0.0/0.0", - "0.0/0.0/0.0/0.0", - "0.0/0.0/0.0/0.0", - "0.0/0.0/0.0/0.0", - "0.0/0.0/0.0/0.0", - "0.0/0.0/0.0/0.0", - "0.0/0.0/0.0/0.0", - "0.0/0.0/0.0/0.0", - "0.0/0.0/0.0/0.0"}, - "Correction matrix for isotope distributions (see documentation); use the following format: <-2Da>/<-1Da>/<+1Da>/<+2Da>; e.g. '0/0.3/4/0', '0.1/0.3/3/0.2'"); + // default: Product Number: A37725 Lot Number: ZF395505 + defaults_.setValue("correction_matrix", std::vector{ + "0.0/0.0/8.6/0.3", + "0.0/0.1/7.8/0.1", + "0.0/0.8/6.9/0.1", + "0.0/7.4/7.4/0.0", + "0.0/1.5/6.2/0.2", + "0.0/1.5/5.7/0.1", + "0.0/2.6/4.8/0.0", + "0.0/2.2/4.6/0.0", + "0.0/2.8/4.5/0.1", + "0.1/2.9/3.8/0.0", + "0.0/3.9/2.8/0.0" + }, + "Correction matrix for isotope distributions (see documentation); use the following format: <-2Da>/<-1Da>/<+1Da>/<+2Da>; e.g. '0/0.3/4/0', '0.1/0.3/3/0.2'"); defaultsToParam_(); } diff --git a/src/openms/source/ANALYSIS/QUANTITATION/TMTSixPlexQuantitationMethod.cpp b/src/openms/source/ANALYSIS/QUANTITATION/TMTSixPlexQuantitationMethod.cpp index fb8246a3b50..a74ff12ded4 100644 --- a/src/openms/source/ANALYSIS/QUANTITATION/TMTSixPlexQuantitationMethod.cpp +++ b/src/openms/source/ANALYSIS/QUANTITATION/TMTSixPlexQuantitationMethod.cpp @@ -45,17 +45,16 @@ namespace OpenMS defaults_.setMinInt("reference_channel", 126); defaults_.setMaxInt("reference_channel", 131); - // {0.0, 1.0, 5.9, 0.2}, //114 - // {0.0, 2.0, 5.6, 0.1}, - // {0.0, 3.0, 4.5, 0.1}, - // {0.1, 4.0, 3.5, 0.1} //117 - defaults_.setValue("correction_matrix", std::vector{"0.0/0.0/0.0/0.0", - "0.0/0.0/0.0/0.0", - "0.0/0.0/0.0/0.0", - "0.0/0.0/0.0/0.0", - "0.0/0.0/0.0/0.0", - "0.0/0.0/0.0/0.0"}, - "Correction matrix for isotope distributions (see documentation); use the following format: <-2Da>/<-1Da>/<+1Da>/<+2Da>; e.g. '0/0.3/4/0', '0.1/0.3/3/0.2'"); + // default: Product Number: 90061 Lot Number: ZE386964 + defaults_.setValue("correction_matrix", std::vector{ + "0.0/0.0/8.6/0.3", + "0.0/0.1/7.8/0.1", + "0.0/1.5/6.2/0.2", + "0.0/1.5/5.7/0.1", + "0.0/3.1/3.6/0.0", + "0.1/2.9/3.8/0.0" + }, + "Correction matrix for isotope distributions (see documentation); use the following format: <-2Da>/<-1Da>/<+1Da>/<+2Da>; e.g. '0/0.3/4/0', '0.1/0.3/3/0.2'"); defaultsToParam_(); } diff --git a/src/openms/source/APPLICATIONS/TOPPBase.cpp b/src/openms/source/APPLICATIONS/TOPPBase.cpp index 13728ac837b..d1b3a92f2d5 100755 --- a/src/openms/source/APPLICATIONS/TOPPBase.cpp +++ b/src/openms/source/APPLICATIONS/TOPPBase.cpp @@ -29,8 +29,6 @@ #include #include #include -#include - #include #include @@ -43,9 +41,6 @@ #include #include -#include - -#include #include @@ -69,7 +64,7 @@ namespace OpenMS using namespace Exception; String TOPPBase::topp_ini_file_ = String(QDir::homePath()) + "/.TOPP.ini"; - const Citation TOPPBase::cite_openms_ + const Citation TOPPBase::cite_openms = {"Pfeuffer, J., Bielow, C., Wein, S. et al.", "OpenMS 3 enables reproducible analysis of large-scale mass spectrometry data", "Nat Methods (2024)", "10.1038/s41592-024-02197-7"}; @@ -571,7 +566,7 @@ namespace OpenMS << bright("Full documentation: ") << underline(docurl) // the space is needed, otherwise the remaining line will be underlined on Windows.. << "\n" << bright("Version: ") << verboseVersion_ << "\n" - << bright("To cite OpenMS:\n") << " + " << is.indent(3) << cite_openms_.toString() + << bright("To cite OpenMS:\n") << " + " << is.indent(3) << cite_openms.toString() << is.indent(0) << "\n"; if (!citations_.empty()) { @@ -702,27 +697,28 @@ namespace OpenMS case ParameterInformation::INPUT_FILE: case ParameterInformation::OUTPUT_FILE: case ParameterInformation::OUTPUT_PREFIX: + case ParameterInformation::OUTPUT_DIR: case ParameterInformation::STRINGLIST: case ParameterInformation::INPUT_FILE_LIST: case ParameterInformation::OUTPUT_FILE_LIST: if (!it->valid_strings.empty()) { StringList copy = it->valid_strings; - for (StringList::iterator str_it = copy.begin(); - str_it != copy.end(); ++str_it) + for (auto& str : copy) { - str_it->quote('\''); + str.quote('\''); } String add = ""; if (it->type == ParameterInformation::INPUT_FILE || it->type == ParameterInformation::OUTPUT_FILE || it->type == ParameterInformation::OUTPUT_PREFIX + || it->type == ParameterInformation::OUTPUT_DIR || it->type == ParameterInformation::INPUT_FILE_LIST || it->type == ParameterInformation::OUTPUT_FILE_LIST) add = " formats"; - restrictions.push_back(String("valid") + add + ": " + ListUtils::concatenate(copy, ", ")); // concatenate restrictions by comma + restrictions.push_back("valid" + add + ": " + ListUtils::concatenate(copy, ", ")); // concatenate restrictions by comma } break; @@ -830,13 +826,11 @@ namespace OpenMS return ParameterInformation(name, ParameterInformation::FLAG, "", "", entry.description, false, advanced); } - bool input_file = entry.tags.count("input file"); - bool output_file = entry.tags.count("output file"); - bool output_prefix = entry.tags.count("output prefix"); - if (input_file && output_file) - { - throw InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Parameter '" + full_name + "' marked as both input and output file"); - } + const bool input_file = entry.tags.count(TAG_INPUT_FILE); + const bool output_file = entry.tags.count(TAG_OUTPUT_FILE); + const bool output_prefix = entry.tags.count(TAG_OUTPUT_PREFIX); + const bool output_dir = entry.tags.count(TAG_OUTPUT_DIR); + assert(input_file + output_file + output_prefix + output_dir <= 1); // at most one of these should be true (or none) enum ParameterInformation::ParameterTypes type = ParameterInformation::NONE; switch (entry.value.valueType()) { @@ -847,6 +841,8 @@ namespace OpenMS type = ParameterInformation::OUTPUT_FILE; else if (output_prefix) type = ParameterInformation::OUTPUT_PREFIX; + else if (output_dir) + type = ParameterInformation::OUTPUT_DIR; else type = ParameterInformation::STRING; break; @@ -1035,7 +1031,7 @@ namespace OpenMS void TOPPBase::setValidFormats_(const String& name, const std::vector& formats, const bool force_OpenMS_format) { - //check if formats are known + // check if formats are known if (force_OpenMS_format) { for (const auto& f : formats) @@ -1053,14 +1049,15 @@ namespace OpenMS ParameterInformation& p = getParameterByName_(name); - //check if the type matches + // check if the type matches if (p.type != ParameterInformation::INPUT_FILE && p.type != ParameterInformation::OUTPUT_FILE && p.type != ParameterInformation::INPUT_FILE_LIST && p.type != ParameterInformation::OUTPUT_FILE_LIST && p.type != ParameterInformation::OUTPUT_PREFIX) + // && p.type != ParameterInformation::OUTPUT_DIR ) // output dir is not a file format, hence does not support restricting the format { - throw ElementNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, name); + throw Exception::WrongParameterType(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, name); } if (!p.valid_strings.empty()) @@ -1074,7 +1071,7 @@ namespace OpenMS { ParameterInformation& p = getParameterByName_(name); - //check if the type matches + // check if the type matches if (p.type != ParameterInformation::INT && p.type != ParameterInformation::INTLIST) { throw ElementNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, name); @@ -1192,6 +1189,13 @@ namespace OpenMS throw InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Registering a required OutputPrefix param (" + name + ") with a non-empty default is forbidden!", default_value); parameters_.emplace_back(name, ParameterInformation::OUTPUT_PREFIX, argument, default_value, description, required, advanced); } + + void TOPPBase::registerOutputDir_(const String& name, const String& argument, const String& default_value, const String& description, bool required, bool advanced) + { + if (required && !default_value.empty()) + throw InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Registering a required OutputDir param (" + name + ") with a non-empty default is forbidden!", default_value); + parameters_.emplace_back(name, ParameterInformation::OUTPUT_DIR, argument, default_value, description, required, advanced); + } void TOPPBase::registerDoubleOption_(const String& name, const String& argument, double default_value, const String& description, bool required, bool advanced) { @@ -1315,6 +1319,28 @@ namespace OpenMS return tmp; } + String TOPPBase::getOutputDirOption(const String& name) const + { + const ParameterInformation& p = findEntry_(name); + if (p.type != ParameterInformation::OUTPUT_DIR) + { + throw WrongParameterType(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, name); + } + if (p.required && (getParam_(name).isEmpty() || getParam_(name) == "")) + { + String message = "'" + name + "'"; + if (! p.valid_strings.empty()) { message += " [valid: " + ListUtils::concatenate(p.valid_strings, ", ") + "]"; } + throw RequiredParameterNotGiven(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, message); + } + String tmp = getParamAsString_(name, p.default_value.toString()); + writeDebug_(String("Value of string(outdir) option '") + name + "': " + tmp, 1); + + // create directory if it does not exist + File::makeDir(tmp); + + return tmp; + } + double TOPPBase::getDoubleOption_(const String& name) const { const ParameterInformation& p = findEntry_(name); @@ -2074,7 +2100,7 @@ namespace OpenMS if (it->type == ParameterInformation::INPUT_FILE || it->type == ParameterInformation::INPUT_FILE_LIST) { - tags.emplace_back("input file"); + tags.emplace_back(TAG_INPUT_FILE); } if (it->type == ParameterInformation::INPUT_FILE && std::find(it->tags.begin(), it->tags.end(), "is_executable") != it->tags.end()) @@ -2082,15 +2108,9 @@ namespace OpenMS tags.emplace_back("is_executable"); } - if (it->type == ParameterInformation::OUTPUT_FILE || it->type == ParameterInformation::OUTPUT_FILE_LIST) - { - tags.emplace_back("output file"); - } - - if (it->type == ParameterInformation::OUTPUT_PREFIX) - { - tags.emplace_back("output prefix"); - } + if (it->type == ParameterInformation::OUTPUT_FILE || it->type == ParameterInformation::OUTPUT_FILE_LIST) { tags.emplace_back(TAG_OUTPUT_FILE); } + if (it->type == ParameterInformation::OUTPUT_PREFIX) { tags.emplace_back(TAG_OUTPUT_PREFIX); } + if (it->type == ParameterInformation::OUTPUT_DIR) { tags.emplace_back(TAG_OUTPUT_DIR); } switch (it->type) { @@ -2105,41 +2125,29 @@ namespace OpenMS case ParameterInformation::INPUT_FILE: case ParameterInformation::OUTPUT_FILE: case ParameterInformation::OUTPUT_PREFIX: + case ParameterInformation::OUTPUT_DIR: tmp.setValue(name, (String)it->default_value.toString(), it->description, tags); if (!it->valid_strings.empty()) { StringList vss_tmp = it->valid_strings; - std::vector vss; - foreach(std::string vs, vss_tmp) + for (auto& vs : vss_tmp) { - vss.push_back("*." + vs); + vs = "*." + vs; } - tmp.setValidStrings(name, vss); + tmp.setValidStrings(name, ListUtils::create(vss_tmp)); } break; case ParameterInformation::DOUBLE: tmp.setValue(name, it->default_value, it->description, tags); - if (it->min_float != -std::numeric_limits::max()) - { - tmp.setMinFloat(name, it->min_float); - } - if (it->max_float != std::numeric_limits::max()) - { - tmp.setMaxFloat(name, it->max_float); - } + tmp.setMinFloat(name, it->min_float); + tmp.setMaxFloat(name, it->max_float); break; case ParameterInformation::INT: tmp.setValue(name, (Int)it->default_value, it->description, tags); - if (it->min_int != -std::numeric_limits::max()) - { - tmp.setMinInt(name, it->min_int); - } - if (it->max_int != std::numeric_limits::max()) - { - tmp.setMaxInt(name, it->max_int); - } + tmp.setMinInt(name, it->min_int); + tmp.setMaxInt(name, it->max_int); break; case ParameterInformation::FLAG: @@ -2402,7 +2410,7 @@ namespace OpenMS // collect citation information std::vector citation_dois; citation_dois.reserve(citations_.size() + 1); - citation_dois.push_back(cite_openms_.doi); + citation_dois.push_back(cite_openms.doi); for (auto& citation : citations_) { citation_dois.push_back(citation.doi); @@ -2494,6 +2502,7 @@ namespace OpenMS case ParameterInformation::INPUT_FILE: case ParameterInformation::OUTPUT_FILE: case ParameterInformation::OUTPUT_PREFIX: + case ParameterInformation::OUTPUT_DIR: if (queue.empty()) value = std::string(); else diff --git a/src/openms/source/CHEMISTRY/ModificationsDB.cpp b/src/openms/source/CHEMISTRY/ModificationsDB.cpp index 20d23bd57c2..502d7d226ab 100644 --- a/src/openms/source/CHEMISTRY/ModificationsDB.cpp +++ b/src/openms/source/CHEMISTRY/ModificationsDB.cpp @@ -59,7 +59,7 @@ namespace OpenMS return db_; } - ModificationsDB* ModificationsDB::initializeModificationsDB(OpenMS::String unimod_file, OpenMS::String psimod_file, OpenMS::String xlmod_file) + ModificationsDB* ModificationsDB::initializeModificationsDB(OpenMS::String unimod_file, OpenMS::String custommod_file, OpenMS::String psimod_file, OpenMS::String xlmod_file) { // Currently its not possible to check for double initialization since getInstance() also calls this function. // if (is_instantiated_) @@ -67,17 +67,22 @@ namespace OpenMS // throw Exception::FailedAPICall(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Cannot initialize ModificationsDB twice"); // } - static ModificationsDB* db_ = new ModificationsDB(std::move(unimod_file), std::move(psimod_file), std::move(xlmod_file)); + static ModificationsDB* db_ = new ModificationsDB(std::move(unimod_file), std::move(custommod_file), std::move(psimod_file), std::move(xlmod_file)); return db_; } - ModificationsDB::ModificationsDB(const OpenMS::String& unimod_file, const OpenMS::String& psimod_file, const OpenMS::String& xlmod_file) + ModificationsDB::ModificationsDB(const OpenMS::String& unimod_file, const OpenMS::String& custommod_file, const OpenMS::String& psimod_file, const OpenMS::String& xlmod_file) { if (!unimod_file.empty()) { readFromUnimodXMLFile(unimod_file); } + if(!custommod_file.empty()) + { + readFromUnimodXMLFile(custommod_file); + } + if (!psimod_file.empty()) { readFromOBOFile(psimod_file); diff --git a/src/openms/source/CONCEPT/ClassTest.cpp b/src/openms/source/CONCEPT/ClassTest.cpp index eb744beae87..a66d06ea7aa 100644 --- a/src/openms/source/CONCEPT/ClassTest.cpp +++ b/src/openms/source/CONCEPT/ClassTest.cpp @@ -99,8 +99,8 @@ namespace OpenMS::Internal::ClassTest if (TEST::infile.good() && TEST::templatefile.good()) { - std::string TEST_FILE__template_line; - std::string TEST_FILE__line; + String TEST_FILE__template_line; + String TEST_FILE__line; while (TEST::infile.good() && TEST::templatefile.good()) { @@ -108,14 +108,15 @@ namespace OpenMS::Internal::ClassTest TEST_FILE__template_line = TEST::line_buffer; TEST::infile.getline(TEST::line_buffer, 65535); TEST_FILE__line = TEST::line_buffer; - - TEST::equal_files &= (TEST_FILE__template_line == TEST_FILE__line); + TEST_FILE__template_line.trim(); // remove leading and trailing whitespaces (ignore CR/LF line endings on Unix) + TEST_FILE__line.trim(); // remove leading and trailing whitespaces (ignore CR/LF line endings on Unix) if (TEST_FILE__template_line != TEST_FILE__line) { - TEST::initialNewline(); - stdcout << " TEST_FILE_EQUAL: line mismatch:\n got: '" - << TEST_FILE__line << "'\n expected: '" - << TEST_FILE__template_line << "'\n"; + TEST::equal_files = false; + TEST::initialNewline(); + stdcout << " TEST_FILE_EQUAL: line mismatch:\n got: '" + << TEST_FILE__line << "'\n expected: '" + << TEST_FILE__template_line << "'\n"; } } } diff --git a/src/openms/source/DATASTRUCTURES/DataValue.cpp b/src/openms/source/DATASTRUCTURES/DataValue.cpp index 74f55678d55..687ee531fff 100644 --- a/src/openms/source/DATASTRUCTURES/DataValue.cpp +++ b/src/openms/source/DATASTRUCTURES/DataValue.cpp @@ -640,9 +640,9 @@ namespace OpenMS return ParamValue(v); } default: - throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Type of DataValue is unkown!"); + throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Type of DataValue is unknown!"); } - throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Type of DataValue is unkown!"); + throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Type of DataValue is unknown!"); } DataValue::operator std::string() const diff --git a/src/openms/source/FEATUREFINDER/FeatureFindingMetabo.cpp b/src/openms/source/FEATUREFINDER/FeatureFindingMetabo.cpp index c41609a775d..f55df8bea8b 100644 --- a/src/openms/source/FEATUREFINDER/FeatureFindingMetabo.cpp +++ b/src/openms/source/FEATUREFINDER/FeatureFindingMetabo.cpp @@ -265,6 +265,8 @@ namespace OpenMS defaults_.setValue("use_smoothed_intensities", "true", "Use LOWESS intensities instead of raw intensities.", {"advanced"}); defaults_.setValidStrings("use_smoothed_intensities", {"false","true"}); + defaults_.setValue("report_smoothed_intensities", "true", "Report smoothed intensities (only if use_smoothed_intensities is true).", {"advanced"}); + defaults_.setValidStrings("report_smoothed_intensities", {"false","true"}); defaults_.setValue("report_convex_hulls", "false", "Augment each reported feature with the convex hull of the underlying mass traces (increases featureXML file size considerably)."); defaults_.setValidStrings("report_convex_hulls", {"false","true"}); @@ -307,6 +309,14 @@ namespace OpenMS isotope_filtering_model_ = param_.getValue("isotope_filtering_model").toString(); use_smoothed_intensities_ = param_.getValue("use_smoothed_intensities").toBool(); + bool use_smoothed = param_.getValue("use_smoothed_intensities").toBool(); + bool report_smoothed = param_.getValue("report_smoothed_intensities").toBool(); + if (report_smoothed && !use_smoothed) { + OPENMS_LOG_WARN << "Warning: 'report_smoothed_intensities' is set to true, but 'use_smoothed_intensities' is false. Ignoring 'report_smoothed_intensities'." << std::endl; + report_smoothed = false; + } + use_smoothed_intensities_ = use_smoothed; + report_smoothed_intensities_ = report_smoothed; use_mz_scoring_C13_ = param_.getValue("mz_scoring_13C").toBool(); report_convex_hulls_ = param_.getValue("report_convex_hulls").toBool(); @@ -1002,20 +1012,20 @@ namespace OpenMS if (report_summed_ints_) { - f.setIntensity(feat_hypos[hypo_idx].getSummedFeatureIntensity(use_smoothed_intensities_)); + f.setIntensity(feat_hypos[hypo_idx].getSummedFeatureIntensity(report_smoothed_intensities_)); } else { - f.setIntensity(feat_hypos[hypo_idx].getMonoisotopicFeatureIntensity(use_smoothed_intensities_)); + f.setIntensity(feat_hypos[hypo_idx].getMonoisotopicFeatureIntensity(report_smoothed_intensities_)); } f.setWidth(feat_hypos[hypo_idx].getFWHM()); f.setCharge(feat_hypos[hypo_idx].getCharge()); f.setMetaValue(3, feat_hypos[hypo_idx].getLabel()); - f.setMetaValue("max_height", feat_hypos[hypo_idx].getMaxIntensity(use_smoothed_intensities_)); + f.setMetaValue("max_height", feat_hypos[hypo_idx].getMaxIntensity(report_smoothed_intensities_)); // store isotope intensities - std::vector all_ints(feat_hypos[hypo_idx].getAllIntensities(use_smoothed_intensities_)); + std::vector all_ints(feat_hypos[hypo_idx].getAllIntensities(report_smoothed_intensities_)); f.setMetaValue(Constants::UserParam::NUM_OF_MASSTRACES, all_ints.size()); if (report_convex_hulls_) f.setConvexHulls(feat_hypos[hypo_idx].getConvexHulls()); f.setOverallQuality(feat_hypos[hypo_idx].getScore()); diff --git a/src/openms/source/FORMAT/FASTAFile.cpp b/src/openms/source/FORMAT/FASTAFile.cpp index b771bdccf7f..52d8d1cb18a 100644 --- a/src/openms/source/FORMAT/FASTAFile.cpp +++ b/src/openms/source/FORMAT/FASTAFile.cpp @@ -146,6 +146,12 @@ namespace OpenMS entries_read_ = 0; } + void FASTAFile::readStartWithProgress(const String& filename, const String& progress_label) + { + readStart(filename); + startProgress(0, fileSize_, progress_label); + } + bool FASTAFile::readNext(FASTAEntry &protein) { if (infile_.eof()) @@ -176,9 +182,25 @@ namespace OpenMS protein.description = std::move(description_); protein.sequence = std::move(seq_); + setProgress(infile_.tellg()); + return true; } + bool FASTAFile::readNextWithProgress(FASTAEntry& protein) + { + if (readNext(protein)) + { + setProgress(position()); + return true; + } + else + { + endProgress(); + return false; + } + } + std::streampos FASTAFile::position() { return infile_.tellg(); @@ -233,7 +255,7 @@ namespace OpenMS void FASTAFile::writeNext(const FASTAEntry &protein) { - outfile_ << ">" << protein.identifier << " " << protein.description << "\n"; + outfile_ << '>' << protein.identifier << ' ' << protein.description << "\n"; const String &tmp(protein.sequence); int chunks(tmp.size() / 80); // number of complete chunks diff --git a/src/openms/source/FORMAT/MzTab.cpp b/src/openms/source/FORMAT/MzTab.cpp index f234128b56c..eaf93908365 100644 --- a/src/openms/source/FORMAT/MzTab.cpp +++ b/src/openms/source/FORMAT/MzTab.cpp @@ -392,6 +392,16 @@ namespace OpenMS return psm_data_; } + const size_t MzTab::getNumberOfPSMs() const + { + std::unordered_set psm_ids; + for (const auto& psm : psm_data_) + { + psm_ids.insert(psm.PSM_ID.get()); + } + return psm_ids.size(); + } + void MzTab::setPSMSectionRows(const MzTabPSMSectionRows& psd) { psm_data_ = psd; diff --git a/src/openms/source/FORMAT/MzTabBase.cpp b/src/openms/source/FORMAT/MzTabBase.cpp index c67df8e4a85..0298bc92a0a 100644 --- a/src/openms/source/FORMAT/MzTabBase.cpp +++ b/src/openms/source/FORMAT/MzTabBase.cpp @@ -668,21 +668,18 @@ namespace OpenMS { String lower = s; lower.toLower().trim(); - if (lower == "null") - { - setNull(true); - } - else if (lower == "nan") - { - setNaN(); - } - else if (lower == "inf") - { - setInf(); - } + if (lower == "null") { setNull(true); } + else if (lower == "nan") { setNaN(); } + else if (lower == "inf") { setInf(); } else // default case { - set(lower.toInt()); + // some mzTab files from external sources contain floating point numbers in integer columns + auto val = lower.toDouble(); + if (val != (Int)val) // check if the value is actually an integer (e.g. 4.0) + { + throw Exception::ConversionError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("Could not convert String '") + s + "' to MzTabInteger"); + } + set((Int)val); } } diff --git a/src/openms/source/FORMAT/MzTabFile.cpp b/src/openms/source/FORMAT/MzTabFile.cpp index a5a84d46320..91c153f1fd7 100644 --- a/src/openms/source/FORMAT/MzTabFile.cpp +++ b/src/openms/source/FORMAT/MzTabFile.cpp @@ -1665,22 +1665,22 @@ namespace OpenMS for (map::const_iterator it = md.contact.begin(); it != md.contact.end(); ++it) { - const MzTabContactMetaData & md = it->second; - if (!md.name.isNull()) + const MzTabContactMetaData & mdc = it->second; + if (!mdc.name.isNull()) { - String s = "MTD\tcontact[" + String(it->first) + "]-name\t" + md.name.toCellString(); + String s = "MTD\tcontact[" + String(it->first) + "]-name\t" + mdc.name.toCellString(); sl.push_back(s); } - if (!md.affiliation.isNull()) + if (!mdc.affiliation.isNull()) { - String s = "MTD\tcontact[" + String(it->first) + "]-affiliation\t" + md.affiliation.toCellString(); + String s = "MTD\tcontact[" + String(it->first) + "]-affiliation\t" + mdc.affiliation.toCellString(); sl.push_back(s); } - if (!md.email.isNull()) + if (!mdc.email.isNull()) { - String s = "MTD\tcontact[" + String(it->first) + "]-email\t" + md.email.toCellString(); + String s = "MTD\tcontact[" + String(it->first) + "]-email\t" + mdc.email.toCellString(); sl.push_back(s); } } @@ -1694,10 +1694,10 @@ namespace OpenMS for (map::const_iterator it = md.fixed_mod.begin(); it != md.fixed_mod.end(); ++it) { - const MzTabModificationMetaData & md = it->second; - if (!md.modification.isNull()) + const MzTabModificationMetaData & mod_md = it->second; + if (!mod_md.modification.isNull()) { - String s = "MTD\tfixed_mod[" + String(it->first) + String("]\t")+ md.modification.toCellString(); + String s = "MTD\tfixed_mod[" + String(it->first) + String("]\t")+ mod_md.modification.toCellString(); sl.push_back(s); } else @@ -1705,25 +1705,25 @@ namespace OpenMS //TODO: add CV for no fixed modification searched when it is available } - if (!md.site.isNull()) + if (!mod_md.site.isNull()) { - String s = "MTD\tfixed_mod[" + String(it->first) + String("]-site\t") + md.site.toCellString(); + String s = "MTD\tfixed_mod[" + String(it->first) + String("]-site\t") + mod_md.site.toCellString(); sl.push_back(s); } - if (!md.position.isNull()) + if (!mod_md.position.isNull()) { - String s = "MTD\tfixed_mod[" + String(it->first) + String("]-position\t") + md.position.toCellString(); + String s = "MTD\tfixed_mod[" + String(it->first) + String("]-position\t") + mod_md.position.toCellString(); sl.push_back(s); } } for (map::const_iterator it = md.variable_mod.begin(); it != md.variable_mod.end(); ++it) { - const MzTabModificationMetaData & md = it->second; - if (!md.modification.isNull()) + const MzTabModificationMetaData & mod_md = it->second; + if (!mod_md.modification.isNull()) { - String s = "MTD\tvariable_mod[" + String(it->first) + String("]\t")+ it->second.modification.toCellString(); + String s = "MTD\tvariable_mod[" + String(it->first) + String("]\t") + mod_md.modification.toCellString(); sl.push_back(s); } else @@ -1731,15 +1731,15 @@ namespace OpenMS //TODO: add CV for no variable modification searched when it is available } - if (!md.site.isNull()) + if (!mod_md.site.isNull()) { - String s = "MTD\tvariable_mod[" + String(it->first) + String("]-site\t")+ it->second.site.toCellString(); + String s = "MTD\tvariable_mod[" + String(it->first) + String("]-site\t") + mod_md.site.toCellString(); sl.push_back(s); } - if (!md.position.isNull()) + if (!mod_md.position.isNull()) { - String s = "MTD\tvariable_mod[" + String(it->first) + String("]-position\t")+ it->second.position.toCellString(); + String s = "MTD\tvariable_mod[" + String(it->first) + String("]-position\t")+ mod_md.position.toCellString(); sl.push_back(s); } } diff --git a/src/openms/source/FORMAT/ParamCTDFile.cpp b/src/openms/source/FORMAT/ParamCTDFile.cpp index 101cfb5d564..5b8a26577ce 100644 --- a/src/openms/source/FORMAT/ParamCTDFile.cpp +++ b/src/openms/source/FORMAT/ParamCTDFile.cpp @@ -7,11 +7,15 @@ // -------------------------------------------------------------------------- #include + +#include + #include #include #include #include + namespace OpenMS { void ParamCTDFile::store(const std::string& filename, const Param& param, const ToolInfo& tool_info) const @@ -44,7 +48,7 @@ namespace OpenMS // write ctd specific stuff os << "\n"; - os << R"(\n"; os << "\n"; os << "\n"; @@ -107,20 +111,25 @@ namespace OpenMS os << param_it->value.toString() << R"(" type="double")"; break; case ParamValue::STRING_VALUE: - if (tag_list.find("input file") != tag_list.end()) + if (tag_list.find(TOPPBase::TAG_INPUT_FILE) != tag_list.end()) { os << escapeXML(param_it->value.toString()) << R"(" type="input-file")"; - tag_list.erase("input file"); + tag_list.erase(TOPPBase::TAG_INPUT_FILE); } - else if (tag_list.find("output file") != tag_list.end()) + else if (tag_list.find(TOPPBase::TAG_OUTPUT_FILE) != tag_list.end()) { os << escapeXML(param_it->value.toString()) << R"(" type="output-file")"; - tag_list.erase("output file"); + tag_list.erase(TOPPBase::TAG_OUTPUT_FILE); + } + else if (tag_list.find(TOPPBase::TAG_OUTPUT_DIR) != tag_list.end()) + { + os << escapeXML(param_it->value.toString()) << R"(" type="output-dir")"; + tag_list.erase(TOPPBase::TAG_OUTPUT_DIR); } - else if (tag_list.find("output prefix") != tag_list.end()) + else if (tag_list.find(TOPPBase::TAG_OUTPUT_PREFIX) != tag_list.end()) { os << escapeXML(param_it->value.toString()) << R"(" type="output-prefix")"; - tag_list.erase("output prefix"); + tag_list.erase(TOPPBase::TAG_OUTPUT_PREFIX); } else if (param_it->valid_strings.size() == 2 && param_it->valid_strings[0] == "true" && param_it->valid_strings[1] == "false" && param_it->value == "false") { @@ -138,15 +147,15 @@ namespace OpenMS } break; case ParamValue::STRING_LIST: - if (tag_list.find("input file") != tag_list.end()) + if (tag_list.find(TOPPBase::TAG_INPUT_FILE) != tag_list.end()) { os << R"(" type="input-file")"; - tag_list.erase("input file"); + tag_list.erase(TOPPBase::TAG_INPUT_FILE); } - else if (tag_list.find("output file") != tag_list.end()) + else if (tag_list.find(TOPPBase::TAG_OUTPUT_FILE) != tag_list.end()) { os << R"(" type="output-file")"; - tag_list.erase("output file"); + tag_list.erase(TOPPBase::TAG_OUTPUT_FILE); } else { diff --git a/src/openms/source/FORMAT/ParamCWLFile.cpp b/src/openms/source/FORMAT/ParamCWLFile.cpp index 52c21a29cbe..e993de41321 100644 --- a/src/openms/source/FORMAT/ParamCWLFile.cpp +++ b/src/openms/source/FORMAT/ParamCWLFile.cpp @@ -5,6 +5,7 @@ // $Authors: Simon Gene Gottlieb $ // -------------------------------------------------------------------------- +#include #include #include #include @@ -103,24 +104,29 @@ namespace OpenMS } } - // converting trags to tdl compatible tags + // converting OpenMS tags to tdl compatible tags std::set tags; for (auto const& t : param_it->tags) { - if (t == "input file") + if (t == TOPPBase::TAG_INPUT_FILE) { tags.insert("file"); } - else if (t == "output file") + else if (t == TOPPBase::TAG_OUTPUT_FILE) { tags.insert("file"); tags.insert("output"); } - else if (t == "output prefix") + else if (t == TOPPBase::TAG_OUTPUT_PREFIX) { tags.insert("output"); tags.insert("prefixed"); } + else if (t == TOPPBase::TAG_OUTPUT_DIR) + { + tags.insert("directory"); + tags.insert("output"); + } else { tags.insert(t); diff --git a/src/openms/source/FORMAT/ParamXMLFile.cpp b/src/openms/source/FORMAT/ParamXMLFile.cpp index dbd9f540442..d3b9eb3e51e 100644 --- a/src/openms/source/FORMAT/ParamXMLFile.cpp +++ b/src/openms/source/FORMAT/ParamXMLFile.cpp @@ -23,7 +23,7 @@ namespace OpenMS } ParamXMLFile::ParamXMLFile() : - XMLFile("/SCHEMAS/Param_1_7_0.xsd", "1.7.0") + XMLFile("/SCHEMAS/Param_1_8_0.xsd", "1.8.0") { } @@ -64,7 +64,7 @@ namespace OpenMS os.precision(writtenDigits(0.0)); os << "\n"; - os << "\n"; + os << "\n"; String indentation = " "; Param::ParamIterator it = param.begin(); while (it != param.end()) diff --git a/src/openms/source/FORMAT/PercolatorInfile.cpp b/src/openms/source/FORMAT/PercolatorInfile.cpp index bf80f8b4fa1..76ba01299ed 100644 --- a/src/openms/source/FORMAT/PercolatorInfile.cpp +++ b/src/openms/source/FORMAT/PercolatorInfile.cpp @@ -3,7 +3,7 @@ // // -------------------------------------------------------------------------- // $Maintainer: Timo Sachsenberg $ -// $Authors: Timo Sachsenberg $ +// $Authors: Timo Sachsenberg, Johannes von Kleist $ // -------------------------------------------------------------------------- #include @@ -62,29 +62,105 @@ namespace OpenMS const String& score_name, const StringList& extra_scores, StringList& filenames, - String decoy_prefix) + String decoy_prefix, + double threshold, + bool SageAnnotation) { CsvFile csv(pin_file, '\t'); - StringList header; - csv.getRow(0, header); + + //Sage Variables, initialized in the following block if SageAnnotation is set + map> anno_mapping; + CsvFile tsv; + CsvFile annos; + unordered_map to_idx_t; + + if (SageAnnotation) // Block for special treatment of sage + { + String tsv_file_path = pin_file.substr(0, pin_file.size()-3); + tsv_file_path = tsv_file_path + "tsv"; + tsv = CsvFile(tsv_file_path,'\t'); + + String temp_diff = "results.sage.pin"; + String anno_file_path = pin_file.substr(0, pin_file.size()-temp_diff.length()); + anno_file_path = anno_file_path + "matched_fragments.sage.tsv"; + annos = CsvFile(anno_file_path, '\t'); + //map PSMID to vec of PeakAnnotation + StringList sage_tsv_header; + tsv.getRow(0, sage_tsv_header); + to_idx_t; // map column name to column index, for full .tsv file + { + int idx_t{}; + for (const auto& h : sage_tsv_header) { to_idx_t[h] = idx_t++; } + } + + // processs annotation file + StringList sage_annotation_header; + annos.getRow(0, sage_annotation_header); + unordered_map to_idx_a; // map column name to column index, for full annotation file file + { + int idx_a{}; + for (const auto& h : sage_annotation_header) { to_idx_a[h] = idx_a++; } + } + // map PSMs -> PeakAnnotation vector + auto num_rows = annos.rowCount(); + + for (size_t i = 1; i < num_rows; ++i) + { + StringList row; + annos.getRow(i, row); + + //Check if mapping already has PSM, if it does add + if (anno_mapping.find(row[to_idx_a.at("psm_id")].toInt()) == anno_mapping.end()) + { + //Make a new vector of annotations + PeptideHit::PeakAnnotation peak_temp; + + peak_temp.annotation = row[to_idx_a.at("fragment_type")] + row[to_idx_a.at("fragment_ordinals")]; + peak_temp.charge = row[to_idx_a.at("fragment_charge")].toInt(); + peak_temp.intensity = row[to_idx_a.at("fragment_intensity")].toDouble(); + peak_temp.mz = row[to_idx_a.at("fragment_mz_experimental")].toDouble(); + + vector temp_anno_vec; + temp_anno_vec.push_back(peak_temp); + anno_mapping[ row[to_idx_a.at("psm_id")].toInt() ] = temp_anno_vec; + } + else + { + //Add values to exisiting vector + PeptideHit::PeakAnnotation peak_temp; + + peak_temp.annotation = row[to_idx_a.at("fragment_type")] + row[to_idx_a.at("fragment_ordinals")]; + peak_temp.charge = row[to_idx_a.at("fragment_charge")].toInt(); + peak_temp.intensity = row[to_idx_a.at("fragment_intensity")].toDouble(); + peak_temp.mz = row[to_idx_a.at("fragment_mz_experimental")].toDouble(); + + anno_mapping[ row[to_idx_a.at("psm_id")].toInt() ].push_back(peak_temp); + } + } + } + + StringList pin_header; + + csv.getRow(0, pin_header); unordered_map to_idx; // map column name to column index { int idx{}; - for (const auto& h : header) { to_idx[h] = idx++; } + for (const auto& h : pin_header) { to_idx[h] = idx++; } } + // determine file name column index in percolator in file int file_name_column_index{-1}; - if (auto it = std::find(header.begin(), header.end(), "FileName"); it != header.end()) + if (auto it = std::find(pin_header.begin(), pin_header.end(), "FileName"); it != pin_header.end()) { - file_name_column_index = it - header.begin(); + file_name_column_index = it - pin_header.begin(); } - - // get column indices of extra scores - std::set found_extra_scores; // additional (non-main) scores that should be stored in the PeptideHit, order important for comparable idXML + + // determine extra scores and store column indices + std::set found_extra_scores; // additional (non-main) scores that should be stored in the PeptideHit, order important for comparable idXML for (const String& s : extra_scores) { - if (auto it = std::find(header.begin(), header.end(), s); it != header.end()) + if (auto it = std::find(pin_header.begin(), pin_header.end(), s); it != pin_header.end()) { found_extra_scores.insert(s); } @@ -93,7 +169,7 @@ namespace OpenMS OPENMS_LOG_WARN << "Extra score: " << s << " not found in Percolator input file." << endl; } } - + // charge columns are not standardized, so we check for the format and create hash to lookup column name to charge mapping std::regex charge_one_hot_pattern("^charge\\d+$"); std::regex sage_one_hot_pattern("^z=\\d+$"); @@ -104,7 +180,7 @@ namespace OpenMS // The reason is that sage searches always for the charge annotated in the spectrum raw file. Only if the annotation is missing it will search // the suggested charge range. bool found_sage_otherz_charge_column{false}; - for (const String& c : header) + for (const String& c : pin_header) { if (std::regex_match(c, charge_one_hot_pattern)) { @@ -136,11 +212,21 @@ namespace OpenMS StringList row; csv.getRow(i, row); - if (row.size() != header.size()) + StringList t_row; + + if (SageAnnotation) + { + tsv.getRow(i, t_row); + // skip if spectrum_q is above threshold + if (t_row[to_idx_t.at("spectrum_q")].toDouble() > threshold ) continue; + } + + if (row.size() != pin_header.size()) { - throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Error: line " + String(i) + " of file '" + pin_file + "' does not have the same number of columns as the header!", String(i)); + throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Error: line " + String(i) + " of file '" + pin_file + "' does not have the same number of columns as the pin_header!", String(i)); } + if (file_name_column_index >= 0) { raw_file_name = row[file_name_column_index]; @@ -150,7 +236,6 @@ namespace OpenMS map_filename_to_idx[raw_file_name] = filenames.size() - 1; } } - // NOTE: In our pin files that we WRITE, SpecID will be filename + vendor spectrum native ID // However, many search engines (e.g. Sage) choose arbitrary IDs, which is unfortunately allowed // by this loosely defined format. @@ -159,14 +244,12 @@ namespace OpenMS if (auto it = to_idx.find("ion_mobility"); it != to_idx.end()) { const String& sIM = row[it->second]; - const double IM = sIM.toDouble(); - pids.back().setMetaValue(Constants::UserParam::IM, IM); + const double IM = sIM.toDouble(); + if (!pids.empty()) pids.back().setMetaValue(Constants::UserParam::IM, IM); } - // In theory, this should be an integer, but Sage currently cannot extract the number from all vendor spectrum IDs, // so it writes the full ID as string String sScanNr = row[to_idx.at("ScanNr")]; - if (sSpecId != spec_id) { pids.resize(pids.size() + 1); @@ -174,13 +257,12 @@ namespace OpenMS pids.back().setScoreType(score_name); pids.back().setMetaValue(Constants::UserParam::ID_MERGE_INDEX, map_filename_to_idx.at(raw_file_name)); pids.back().setRT(row[to_idx.at("retentiontime")].toDouble() * 60.0); // search engines typically write minutes (e.g., sage) - pids.back().setMetaValue("PinSpecId", sSpecId); + pids.back().setMetaValue("PinSpecId", sSpecId); // Since ScanNr is the closest to help in identifying the spectrum in the file later on, // we use it as spectrum_reference. Since it can be integer only or the complete // vendor ID, you will need a lookup in case of number only later!! - pids.back().setSpectrumReference(sScanNr); + pids.back().setSpectrumReference(sScanNr); } - String sPeptide = row[to_idx.at("Peptide")]; const double score = row[to_idx.at(score_name)].toDouble(); String target_decoy = row[to_idx.at("Label")].toInt() == 1 ? "target" : "decoy"; @@ -253,12 +335,33 @@ namespace OpenMS AASequence aa_seq = AASequence::fromString(sPeptide); PeptideHit ph(score, rank, charge, std::move(aa_seq)); ph.setMetaValue("target_decoy", target_decoy); + for (const auto& name : found_extra_scores) { ph.setMetaValue(name, row[to_idx.at(name)]); } ph.setRank(rank); + // adding own meta values + if (SageAnnotation) + { + ph.setMetaValue("spectrum_q", t_row[to_idx_t.at("spectrum_q")].toDouble()); //TODO: check if column exists / SAGE specific treatment + } + ph.setMetaValue("DeltaMass", ( row[to_idx.at("ExpMass")].toDouble() - row[to_idx.at("CalcMass")].toDouble()) ); + // add annotations + if (SageAnnotation) + { + if (anno_mapping.find(sSpecId.toInt()) != anno_mapping.end()) + { + // copy annotations from mapping to PeptideHit + vector pep_vec; + for (const PeptideHit::PeakAnnotation& pep : anno_mapping[sSpecId.toInt()]) + { + pep_vec.push_back(pep) ; + } + ph.setPeakAnnotations(pep_vec); + } + } // add link to protein (we only know the accession but not start/end, aa_before/after in protein at this point) for (const String& accession : accessions) { @@ -267,6 +370,7 @@ namespace OpenMS pids.back().insertHit(std::move(ph)); } + return pids; } @@ -542,4 +646,4 @@ namespace OpenMS return count; } -} +} \ No newline at end of file diff --git a/src/openms/source/FORMAT/SwathFile.cpp b/src/openms/source/FORMAT/SwathFile.cpp index 0c5c41e35d5..6ce7016a6d1 100644 --- a/src/openms/source/FORMAT/SwathFile.cpp +++ b/src/openms/source/FORMAT/SwathFile.cpp @@ -309,7 +309,8 @@ namespace OpenMS /// Counts the number of scans in a full Swath file (e.g. concatenated non-split file) void SwathFile::countScansInSwath_(const std::vector& exp, std::vector& swath_counter, int& nr_ms1_spectra, - std::vector& known_window_boundaries) + std::vector& known_window_boundaries, + double TOLERANCE) { int ms1_counter = 0; for (Size i = 0; i < exp.size(); i++) @@ -328,28 +329,32 @@ namespace OpenMS "Found SWATH scan (MS level 2 scan) without a precursor. Cannot determine SWATH window."); } const std::vector prec = s.getPrecursors(); - double center = prec[0].getMZ(); - - - // check if ion mobility is present - double lowerIm = -1; - double upperIm = -1; // these initial values assume ion mobility is not present + // set ion mobility if exists, otherwise will take default value of -1 + double imLower, imUpper; if (s.metaValueExists("ion mobility lower limit")) { - lowerIm = s.getMetaValue("ion mobility lower limit"); // want this to be -1 if no ion mobility - upperIm = s.getMetaValue("ion mobility upper limit"); + imLower = s.getMetaValue("ion mobility lower limit"); // want this to be -1 if no ion mobility + imUpper = s.getMetaValue("ion mobility upper limit"); } + else + { + imLower = -1; + imUpper = -1; + } + const OpenSwath::SwathMap boundary(prec[0].getMZ() - prec[0].getIsolationWindowLowerOffset(), + prec[0].getMZ() + prec[0].getIsolationWindowUpperOffset(), + prec[0].getMZ(), + imLower, + imUpper, + false); bool found = false; - for (Size j = 0; j < known_window_boundaries.size(); j++) { - // We group by the precursor mz (center of the window) since this - // should be present - // for ion mobility, since the center value is not present in the raw data (it is computed) we use the imLower and upper bounds - if ((std::fabs(center - known_window_boundaries[j].center) < 1e-6) && (std::fabs(lowerIm - known_window_boundaries[j].imLower) < 1e-6) && (std::fabs(upperIm - known_window_boundaries[j].imUpper < 1e-6))) + // Check if the current scan is within the known window boundaries + if (known_window_boundaries[j].isEqual(boundary, TOLERANCE)) { found = true; swath_counter[j]++; @@ -359,23 +364,11 @@ namespace OpenMS { // we found a new SWATH scan swath_counter.push_back(1); - double lower = prec[0].getMZ() - prec[0].getIsolationWindowLowerOffset(); - double upper = prec[0].getMZ() + prec[0].getIsolationWindowUpperOffset(); - - OpenSwath::SwathMap boundary; - boundary.lower = lower; - boundary.upper = upper; - boundary.center = center; - - // set IM boundaries (if present) - boundary.imLower = lowerIm; - boundary.imUpper = upperIm; - known_window_boundaries.push_back(boundary); - OPENMS_LOG_DEBUG << "Adding Swath centered at " << center - << " m/z with an isolation window of " << lower << " to " << upper - << " m/z and start of " << lowerIm << " and IM end of " << upperIm << std::endl; + OPENMS_LOG_DEBUG << "Adding Swath centered at " << boundary.center + << " m/z with an isolation window of " << boundary.lower << " to " << boundary.upper + << " m/z and IM start of " << boundary.imLower << " and IM end of " << boundary.imUpper << std::endl; } } } diff --git a/src/openms/source/KERNEL/MSSpectrum.cpp b/src/openms/source/KERNEL/MSSpectrum.cpp index 45bd9d6f2d5..0b3bfc40819 100644 --- a/src/openms/source/KERNEL/MSSpectrum.cpp +++ b/src/openms/source/KERNEL/MSSpectrum.cpp @@ -510,19 +510,12 @@ namespace OpenMS return *this; } - MSSpectrum::MSSpectrum() : - ContainerType(), - RangeManagerContainerType(), - SpectrumSettings(), - retention_time_(-1), - drift_time_(-1), - drift_time_unit_(DriftTimeUnit::NONE), - ms_level_(1), - name_(), - float_data_arrays_(), - string_data_arrays_(), - integer_data_arrays_() - {} + MSSpectrum::MSSpectrum() = default; + + MSSpectrum::MSSpectrum(const std::initializer_list& init) + : ContainerType(init) + { + } MSSpectrum::MSSpectrum(const MSSpectrum &source) = default; diff --git a/src/openms/source/METADATA/SpectrumLookup.cpp b/src/openms/source/METADATA/SpectrumLookup.cpp index c4bfdf5f178..732dd2915ad 100644 --- a/src/openms/source/METADATA/SpectrumLookup.cpp +++ b/src/openms/source/METADATA/SpectrumLookup.cpp @@ -318,11 +318,18 @@ namespace OpenMS boost::sregex_token_iterator current_begin(native_id.begin(), native_id.end(), regexp, subgroups); boost::sregex_token_iterator current_end(native_id.end(), native_id.end(), regexp, subgroups); matches.insert(matches.end(), current_begin, current_end); - if (matches.size() == 1) // default case: one native identifier + + if (matches.size() < subgroups.size()) { + OPENMS_LOG_WARN << "native_id '" << native_id <<"' is invalid. Could not extract scan number." << std::endl; + return -1; + } + + if (subgroups.size() == 1) // default case: one native identifier { try { - String value = String(matches[0]); + // In case of merged spectra the last native id matches the scan number of the merged scan. + String value = String(matches[matches.size() - 1]); if (native_id_type_accession == "MS:1000774") { return value.toInt() + 1; // if the native ID is index=.., the scan number is usually considered index+1 (especially for pepXML) @@ -334,27 +341,31 @@ namespace OpenMS } catch (Exception::ConversionError&) { - OPENMS_LOG_WARN << "Value: '" << String(matches[0]) << "' could not be converted to int in string. Native ID='" << native_id << "'" << std::endl; + OPENMS_LOG_WARN << "Value: '" << String(matches[matches.size() - 1]) << "' could not be converted to int in string. Native ID='" << native_id << "'" << std::endl; return -1; } } - else if (matches.size() == 2) // special case: wiff file with two native identifiers + else if (subgroups.size() == 2) // special case: wiff file with two native identifiers { try { - if (String(matches[1]).toInt() < 1000) // checks if value of experiment is smaller than 1000 (cycle * 1000 + experiment) + // In case of merged spectra the last native id matches the scan number of the merged scan. + String cycle_str = matches[matches.size() - 2]; + String experiment_str = matches[matches.size() - 1]; + + if (experiment_str.toInt() < 1000) // checks if value of experiment is smaller than 1000 (cycle * 1000 + experiment) { - int value = String(matches[0]).toInt() * 1000 + String(matches[1]).toInt(); + int value = cycle_str.toInt() * 1000 + experiment_str.toInt(); return value; } else { - throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The value of experiment is too large and can not be handled properly.", String(matches[1])); + throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The value of experiment is too large and can not be handled properly.", experiment_str); } } catch (Exception::ConversionError&) { - OPENMS_LOG_WARN << "Value: '" << String(matches[0]) << "' could not be converted to int in string. Native ID='" + OPENMS_LOG_WARN << "Values: '" << matches[matches.size() - 2] << "', '" << matches[matches.size() - 1] << "' could not be converted to int in string. Native ID='" << native_id << "' accession='" << native_id_type_accession << "'" << std::endl; return -1; } diff --git a/src/openms/source/SYSTEM/File.cpp b/src/openms/source/SYSTEM/File.cpp index 2bb1811757c..ce4d97d019e 100644 --- a/src/openms/source/SYSTEM/File.cpp +++ b/src/openms/source/SYSTEM/File.cpp @@ -244,6 +244,11 @@ namespace OpenMS return true; } + bool File::copy(const String& from, const String& to) + { + return QFile::copy(from.toQString(), to.toQString()); + } + bool File::remove(const String& file) { if (!exists(file)) @@ -284,6 +289,12 @@ namespace OpenMS return result; } + bool File::makeDir(const String& dir_name) + { + QDir dir; + return dir.mkpath(dir_name.toQString()); + } + bool File::removeDirRecursively(const String& dir_name) { bool fail = false; diff --git a/src/openms_gui/include/OpenMS/VISUAL/APPLICATIONS/TOPPViewBase.h b/src/openms_gui/include/OpenMS/VISUAL/APPLICATIONS/TOPPViewBase.h index a4b3c2c2be1..740bc006c0d 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/APPLICATIONS/TOPPViewBase.h +++ b/src/openms_gui/include/OpenMS/VISUAL/APPLICATIONS/TOPPViewBase.h @@ -187,10 +187,10 @@ namespace OpenMS @param data_type Type of the data @param show_as_1d Force dataset to be opened in 1D mode (even if it contains several spectra) @param show_options If the options dialog should be shown (otherwise the defaults are used) - @param as_new_window Open the layer in a new window within TOPPView + @param as_new_window Open the layer in a new window within TOPPView (ignored if 'window_id' is set) @param filename source file name (if the data came from a file) @param caption Sets the layer name and window caption of the data. If unset the file name is used. If set, the file is not monitored for changes. - @param window_id in which window the file is opened if opened as a new layer (0 or default equals current + @param window_id in which window the file is opened if opened as a new layer (0 will open a new window). @param spectrum_id determines the spectrum to show in 1D view. */ void addData(const FeatureMapSharedPtrType& feature_map, diff --git a/src/openms_gui/include/OpenMS/VISUAL/DIALOGS/TOPPASIOMappingDialog.h b/src/openms_gui/include/OpenMS/VISUAL/DIALOGS/TOPPASIOMappingDialog.h index 4654dd35505..991d083d998 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/DIALOGS/TOPPASIOMappingDialog.h +++ b/src/openms_gui/include/OpenMS/VISUAL/DIALOGS/TOPPASIOMappingDialog.h @@ -11,8 +11,6 @@ // OpenMS_GUI config #include -#include - #include #include @@ -59,9 +57,6 @@ public slots: /// The edge we are configuring TOPPASEdge * edge_; - /// Vector storing the mapping of the target input combobox indices to param indices of edges - QVector target_input_param_indices_; - protected slots: /// Called when OK is pressed; checks if the selected parameters are valid diff --git a/src/openms_gui/include/OpenMS/VISUAL/ParamEditor.h b/src/openms_gui/include/OpenMS/VISUAL/ParamEditor.h index c22b1f038cf..8984bf560f2 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/ParamEditor.h +++ b/src/openms_gui/include/OpenMS/VISUAL/ParamEditor.h @@ -107,6 +107,8 @@ private slots: ParamEditorDelegate(); /// used to modify value of output and input files( not for output and input lists) mutable QString fileName_; + /// holds a directory name (for output directories) + mutable QString dirName_; /// true if a QLineEdit is still open and has not committed its data yet (so storing the current param is a bad idea) mutable bool has_uncommited_data_; }; @@ -118,13 +120,13 @@ private slots: Q_OBJECT public: - ///Constructor + /// Constructor ParamTree(QWidget * parent); /// Overloaded edit method to activate F2 use bool edit(const QModelIndex & index, EditTrigger trigger, QEvent * event) override; signals: - ///Signal that is emitted when a new item is selected + /// Signal that is emitted when a new item is selected void selected(const QModelIndex & index); protected slots: diff --git a/src/openms_gui/include/OpenMS/VISUAL/Plot1DCanvas.h b/src/openms_gui/include/OpenMS/VISUAL/Plot1DCanvas.h index 9cc5f9c8dc6..b847e0535ef 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/Plot1DCanvas.h +++ b/src/openms_gui/include/OpenMS/VISUAL/Plot1DCanvas.h @@ -418,6 +418,12 @@ namespace OpenMS return point; } + /// overload to call the 1D version (which has min-intensity of '0') + virtual const RangeType& getDataRange() const override + { + return overall_data_range_1d_; + } + /** * \brief Pushes a data point back into the valid data range of the current layer area. Useful for annotation items which were mouse-dragged outside the range by the user. * \tparam T A data point, e.g. Peak1D, which may be outside the data area @@ -581,7 +587,7 @@ protected slots: void drawAlignment_(QPainter& painter); /// internal method, called before calling parent function PlotCanvas::changeVisibleArea_ - void changeVisibleAreaCommon_(const UnitRange& new_area, bool repaint, bool add_to_stack); + void changeVisibleArea1D_(const UnitRange& new_area, bool repaint, bool add_to_stack); // Docu in base class void changeVisibleArea_(VisibleArea new_area, bool repaint = true, bool add_to_stack = false) override; diff --git a/src/openms_gui/include/OpenMS/VISUAL/PlotCanvas.h b/src/openms_gui/include/OpenMS/VISUAL/PlotCanvas.h index 70b31ab472d..729f4c76081 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/PlotCanvas.h +++ b/src/openms_gui/include/OpenMS/VISUAL/PlotCanvas.h @@ -397,11 +397,16 @@ namespace OpenMS @param map Shared pointer to input map. It can be performed in constant time and does not double the required memory. @param od_map Shared pointer to on disk data which potentially caches some data to save memory (the map can be empty, but do not pass nullptr). @param filename This @em absolute filename is used to monitor changes in the file and reload the data + @param caption The caption of the layer (shown in the layer window) @param use_noise_cutoff Add a noise filter which removes low-intensity peaks @return If a new layer was created */ - bool addPeakLayer(const ExperimentSharedPtrType& map, ODExperimentSharedPtrType od_map, const String& filename = "", const bool use_noise_cutoff = false); + bool addPeakLayer(const ExperimentSharedPtrType& map, + ODExperimentSharedPtrType od_map, + const String& filename = "", + const String& caption = "", + const bool use_noise_cutoff = false); /** @brief Add a chrom data layer @@ -409,10 +414,11 @@ namespace OpenMS @param map Shared pointer to input map. It can be performed in constant time and does not double the required memory. @param od_map Shared pointer to on disk data which potentially caches some data to save memory (the map can be empty, but do not pass nullptr). @param filename This @em absolute filename is used to monitor changes in the file and reload the data + @param caption The caption of the layer (shown in the layer window) @return If a new layer was created */ - bool addChromLayer(const ExperimentSharedPtrType& map, ODExperimentSharedPtrType od_map, const String& filename = ""); + bool addChromLayer(const ExperimentSharedPtrType& map, ODExperimentSharedPtrType od_map, const String& filename = "", const String& caption = ""); /** @@ -420,32 +426,36 @@ namespace OpenMS @param map Shared Pointer to input map. It can be performed in constant time and does not double the required memory. @param filename This @em absolute filename is used to monitor changes in the file and reload the data + @param caption The caption of the layer (shown in the layer window) @return If a new layer was created */ - bool addLayer(FeatureMapSharedPtrType map, const String& filename = ""); + bool addLayer(FeatureMapSharedPtrType map, const String& filename = "", const String& caption = ""); /** @brief Add a consensus feature data layer @param map Shared Pointer to input map. It can be performed in constant time and does not double the required memory. @param filename This @em absolute filename is used to monitor changes in the file and reload the data + @param caption The caption of the layer (shown in the layer window) @return If a new layer was created */ - bool addLayer(ConsensusMapSharedPtrType map, const String& filename = ""); + bool addLayer(ConsensusMapSharedPtrType map, const String& filename = "", const String& caption = ""); //@} /** @brief Add an identification data layer - @param peptides Input list of peptides, which has to be mutable and will be empty after adding. Swapping is used to insert the data. It can be performed in constant time and does not double - the required memory. + @param peptides Input list of peptides, which has to be mutable and will be empty after adding. + Swapping is used to insert the data. It can be performed in constant time and does not double + the required memory. @param filename This @em absolute filename is used to monitor changes in the file and reload the data + @param caption The caption of the layer (shown in the layer window) @return If a new layer was created */ - bool addLayer(std::vector& peptides, const String& filename = ""); + bool addLayer(std::vector& peptides, const String& filename = "", const String& caption = ""); /// Returns the minimum intensity of the active layer inline float getCurrentMinIntensity() const @@ -489,7 +499,7 @@ namespace OpenMS @see overall_data_range_ */ - const RangeType& getDataRange() const; + virtual const RangeType& getDataRange() const; /** @brief Returns the first intensity scaling factor for 'snap to maximum intensity mode' (for the currently visible data range). diff --git a/src/openms_gui/include/OpenMS/VISUAL/TOPPASInputFileListVertex.h b/src/openms_gui/include/OpenMS/VISUAL/TOPPASInputFileListVertex.h index af83d5cbb45..54ef3572d8a 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/TOPPASInputFileListVertex.h +++ b/src/openms_gui/include/OpenMS/VISUAL/TOPPASInputFileListVertex.h @@ -20,43 +20,44 @@ namespace OpenMS @ingroup TOPPAS_elements */ - class OPENMS_GUI_DLLAPI TOPPASInputFileListVertex : - public TOPPASVertex - { - Q_OBJECT +class OPENMS_GUI_DLLAPI TOPPASInputFileListVertex : public TOPPASVertex +{ + Q_OBJECT public: + /// Default constructor + TOPPASInputFileListVertex() = default; + /// Constructor + TOPPASInputFileListVertex(const QStringList& files); + /// Copy constructor + TOPPASInputFileListVertex(const TOPPASInputFileListVertex& rhs) = default; + /// Destructor + ~TOPPASInputFileListVertex() override = default; + /// Assignment operator + TOPPASInputFileListVertex& operator=(const TOPPASInputFileListVertex& rhs) = default; + + virtual std::unique_ptr clone() const override; - /// Default constructor - TOPPASInputFileListVertex() = default; - /// Constructor - TOPPASInputFileListVertex(const QStringList& files); - /// Copy constructor - TOPPASInputFileListVertex(const TOPPASInputFileListVertex& rhs) = default; - /// Destructor - ~TOPPASInputFileListVertex() override = default; - /// Assignment operator - TOPPASInputFileListVertex & operator=(const TOPPASInputFileListVertex & rhs) = default; - /// returns "InputVertex" - String getName() const override; - /// Sets the list of files - void setFilenames(const QStringList & files); - /// Starts all tools below this node - void run() override; - // documented in base class - void paint(QPainter * painter, const QStyleOptionGraphicsItem * option, QWidget * widget) override; - // documented in base class - QRectF boundingRect() const override; - /// Checks if the given list of file names is valid - bool fileNamesValid(); - /// Shows the dialog for editing the files - void showFilesDialog(); - /// Opens the folders of the input files - void openContainingFolder(); - /// Returns the key (for applying resources from a resource file) - const QString & getKey(); - /// Sets the key (for applying resources from a resource file) - void setKey(const QString & key); + /// returns "InputVertex" + String getName() const override; + /// Sets the list of files + void setFilenames(const QStringList & files); + /// Starts all tools below this node + void run() override; + // documented in base class + void paint(QPainter * painter, const QStyleOptionGraphicsItem * option, QWidget * widget) override; + // documented in base class + QRectF boundingRect() const override; + /// Checks if the given list of file names is valid + bool fileNamesValid(); + /// Shows the dialog for editing the files + void showFilesDialog(); + /// Opens the folders of the input files + void openContainingFolder(); + /// Returns the key (for applying resources from a resource file) + const QString & getKey(); + /// Sets the key (for applying resources from a resource file) + void setKey(const QString & key); public slots: /// Called by an outgoing edge when it has changed diff --git a/src/openms_gui/include/OpenMS/VISUAL/TOPPASMergerVertex.h b/src/openms_gui/include/OpenMS/VISUAL/TOPPASMergerVertex.h index 58340476a62..774d299442a 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/TOPPASMergerVertex.h +++ b/src/openms_gui/include/OpenMS/VISUAL/TOPPASMergerVertex.h @@ -29,35 +29,36 @@ namespace OpenMS @ingroup TOPPAS_elements */ - class OPENMS_GUI_DLLAPI TOPPASMergerVertex : - public TOPPASVertex - { - Q_OBJECT +class OPENMS_GUI_DLLAPI TOPPASMergerVertex : public TOPPASVertex +{ + Q_OBJECT public: + /// Default constructor + TOPPASMergerVertex() = default; + /// Constructor + TOPPASMergerVertex(bool round_based); + /// Copy constructor + TOPPASMergerVertex(const TOPPASMergerVertex& rhs) = default; + /// Destructor + ~TOPPASMergerVertex() override = default; + /// Assignment operator + TOPPASMergerVertex& operator=(const TOPPASMergerVertex& rhs) = default; + + virtual std::unique_ptr clone() const override; - /// Default constructor - TOPPASMergerVertex() = default; - /// Constructor - TOPPASMergerVertex(bool round_based); - /// Copy constructor - TOPPASMergerVertex(const TOPPASMergerVertex& rhs) = default; - /// Destructor - ~TOPPASMergerVertex() override = default; - /// Assignment operator - TOPPASMergerVertex& operator=(const TOPPASMergerVertex& rhs) = default; - /// returns "MergerVertex" - String getName() const override; - /// check if upstream nodes are finished and call downstream nodes - void run() override; - /// Determines whether this merger is merging round based or merging all inputs into one list - bool roundBasedMode() const; - // documented in base class - void paint(QPainter* painter, const QStyleOptionGraphicsItem* option, QWidget* widget) override; - // documented in base class - QRectF boundingRect() const override; - // documented in base class - void markUnreachable() override; + /// returns "MergerVertex" + String getName() const override; + /// check if upstream nodes are finished and call downstream nodes + void run() override; + /// Determines whether this merger is merging round based or merging all inputs into one list + bool roundBasedMode() const; + // documented in base class + void paint(QPainter* painter, const QStyleOptionGraphicsItem* option, QWidget* widget) override; + // documented in base class + QRectF boundingRect() const override; + // documented in base class + void markUnreachable() override; public slots: diff --git a/src/openms_gui/include/OpenMS/VISUAL/TOPPASOutputFileListVertex.h b/src/openms_gui/include/OpenMS/VISUAL/TOPPASOutputFileListVertex.h index 0e177462885..704315b8b41 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/TOPPASOutputFileListVertex.h +++ b/src/openms_gui/include/OpenMS/VISUAL/TOPPASOutputFileListVertex.h @@ -11,7 +11,7 @@ // OpenMS_GUI config #include -#include +#include namespace OpenMS { @@ -21,69 +21,22 @@ namespace OpenMS @ingroup TOPPAS_elements */ class OPENMS_GUI_DLLAPI TOPPASOutputFileListVertex : - public TOPPASVertex + public TOPPASOutputVertex { Q_OBJECT public: - - /// Default constructor - TOPPASOutputFileListVertex() = default; - /// Copy constructor - TOPPASOutputFileListVertex(const TOPPASOutputFileListVertex & rhs); - /// Destructor - ~TOPPASOutputFileListVertex() override = default; - /// Assignment operator - TOPPASOutputFileListVertex & operator=(const TOPPASOutputFileListVertex & rhs); - /// returns "OutputVertex" + virtual std::unique_ptr clone() const override; + /// returns "OutputFileVertex" String getName() const override; // documented in base class void paint(QPainter * painter, const QStyleOptionGraphicsItem * option, QWidget * widget) override; // documented in base class QRectF boundingRect() const override; - // documented in base class - void reset(bool reset_all_files = false) override; - /// opens the folder containing the output data - void mouseDoubleClickEvent(QGraphicsSceneMouseEvent*) override; + /// Called when the parent node has finished execution void run() override; - /// Returns the full directory (including preceding output path as selected by user) - String getFullOutputDirectory() const; - /// Returns the directory where the output files are stored - String getOutputDir() const; - /// Creates the output directory for this node - String createOutputDir() const; - /// Sets the topological sort number and removes invalidated tmp files - void setTopoNr(UInt nr) override; - /// Opens the folders of the output files - void openContainingFolder() const; - /// Sets a custom output folder name, which will be integrated into 'getOutputDir()' and 'getFullOutputDirectory()' calls. - /// @note The string is not checked for validity (avoid characters which are not allowed in directories, e.g. '{') - void setOutputFolderName(const QString& name); - /// return the output folder where results are written - const QString& getOutputFolderName() const; - -public slots: - - //documented in base class - void inEdgeHasChanged() override; - -signals: - /// Emitted when an output file was written - void outputFileWritten(const String& file); - - /// Emitted when user has changed the output folder name (i.e. output dir needs to be newly created and packages updates) - void outputFolderNameChanged(); - -protected: - - // custom output folder name - QString output_folder_name_; - static bool copy_(const QString & from, const QString & to); ///< STATIC(!) function which calls QFile::copy(); needs to be static, since we need to pass a function pointer (which does not work on member functions) - // convenience members, not required for operation, but for progress during copying - int files_written_ = 0; ///< files that were already written - int files_total_ = 0; ///< total number of files from upstream }; } diff --git a/src/openms_gui/include/OpenMS/VISUAL/TOPPASOutputFolderVertex.h b/src/openms_gui/include/OpenMS/VISUAL/TOPPASOutputFolderVertex.h new file mode 100644 index 00000000000..4d6ae980e11 --- /dev/null +++ b/src/openms_gui/include/OpenMS/VISUAL/TOPPASOutputFolderVertex.h @@ -0,0 +1,40 @@ +// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Chris Bielow $ +// $Authors: Chris Bielow $ +// -------------------------------------------------------------------------- + +#pragma once + +// OpenMS_GUI config +#include + +#include + +namespace OpenMS +{ + /** + @brief A vertex representing an output folder + + @ingroup TOPPAS_elements + */ + class OPENMS_GUI_DLLAPI TOPPASOutputFolderVertex : + public TOPPASOutputVertex + { + Q_OBJECT + +public: + virtual std::unique_ptr clone() const override; + /// returns "OutputFolderVertex" + String getName() const override; + // documented in base class + void paint(QPainter * painter, const QStyleOptionGraphicsItem * option, QWidget * widget) override; + // documented in base class + QRectF boundingRect() const override; + + /// Called when the parent node has finished execution + void run() override; + }; +} //namespace OpenMS diff --git a/src/openms_gui/include/OpenMS/VISUAL/TOPPASOutputVertex.h b/src/openms_gui/include/OpenMS/VISUAL/TOPPASOutputVertex.h new file mode 100644 index 00000000000..03aedcd1d9b --- /dev/null +++ b/src/openms_gui/include/OpenMS/VISUAL/TOPPASOutputVertex.h @@ -0,0 +1,75 @@ +// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Maintainer: Chris Bielow $ +// $Authors: Chris Bielow $ +// -------------------------------------------------------------------------- + +#pragma once + +// OpenMS_GUI config +#include + +#include + +namespace OpenMS +{ + /** + @brief A vertex representing an output, either folder or files(s) + + @ingroup TOPPAS_elements + */ + class OPENMS_GUI_DLLAPI TOPPASOutputVertex : public TOPPASVertex + { + Q_OBJECT + public: + /// Default C'tor + TOPPASOutputVertex() = default; + /// Copy constructor + TOPPASOutputVertex(const TOPPASOutputVertex& rhs); + /// Assignment operator + TOPPASOutputVertex& operator=(const TOPPASOutputVertex& rhs); + + // documented in base class + void reset(bool reset_all_files = false) override; + /// opens the folder containing the output data + void mouseDoubleClickEvent(QGraphicsSceneMouseEvent*) override; + /// Returns the full directory (including preceding output path as selected by user and a trailing '/') + String getFullOutputDirectory() const; + /// Returns the directory where the output files are stored (includes a trailing '/') + String getOutputDir() const; + /// Creates the output directory for this node (includes a trailing '/') + String createOutputDir() const; + /// Sets the topological sort number and removes invalidated tmp files + void setTopoNr(UInt nr) override; + /// Opens the folders of the output files + void openContainingFolder() const; + /// Sets a custom output folder name, which will be integrated into 'getOutputDir()' and 'getFullOutputDirectory()' calls. + /// @note The string is not checked for validity (avoid characters which are not allowed in directories, e.g. '{') + void setOutputFolderName(const QString& name); + /// return the output folder where results are written + const QString& getOutputFolderName() const; + + signals: + /// Emitted when an output file was written + void outputFileWritten(const String& file); + + /// Emitted when user has changed the output folder name (i.e. output dir needs to be newly created and packages updates) + void outputFolderNameChanged(); + + public slots: + // documented in base class + void inEdgeHasChanged() override; + + protected: + /// custom output folder name + QString output_folder_name_; + + // convenience members, not required for operation, but for progress during copying + int files_written_ = 0; ///< files that were already written + int files_total_ = 0; ///< total number of files from upstream + }; + +} // namespace OpenMS + diff --git a/src/openms_gui/include/OpenMS/VISUAL/TOPPASScene.h b/src/openms_gui/include/OpenMS/VISUAL/TOPPASScene.h index 4dc1d07cc07..f7f5d7ae722 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/TOPPASScene.h +++ b/src/openms_gui/include/OpenMS/VISUAL/TOPPASScene.h @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -191,7 +192,7 @@ namespace OpenMS ///Connects the signals to slots void connectToolVertexSignals(TOPPASToolVertex * ttv); ///Connects the signals to slots - void connectOutputVertexSignals(TOPPASOutputFileListVertex * oflv); + void connectOutputVertexSignals(TOPPASOutputVertex * oflv); ///Connects the signals to slots void connectMergerVertexSignals(TOPPASMergerVertex * tmv); ///Connects the signals to slots diff --git a/src/openms_gui/include/OpenMS/VISUAL/TOPPASSplitterVertex.h b/src/openms_gui/include/OpenMS/VISUAL/TOPPASSplitterVertex.h index d29bbfc2a50..7b0ad307b61 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/TOPPASSplitterVertex.h +++ b/src/openms_gui/include/OpenMS/VISUAL/TOPPASSplitterVertex.h @@ -39,6 +39,7 @@ namespace OpenMS ~TOPPASSplitterVertex() override = default; /// Assignment operator TOPPASSplitterVertex& operator=(const TOPPASSplitterVertex& rhs); + virtual std::unique_ptr clone() const override; /// returns "SplitterVertex" String getName() const override; /// check if upstream nodes are finished and call downstream nodes diff --git a/src/openms_gui/include/OpenMS/VISUAL/TOPPASToolVertex.h b/src/openms_gui/include/OpenMS/VISUAL/TOPPASToolVertex.h index 87514b9f33a..ed4b54a898b 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/TOPPASToolVertex.h +++ b/src/openms_gui/include/OpenMS/VISUAL/TOPPASToolVertex.h @@ -63,7 +63,8 @@ namespace OpenMS enum IOType { IOT_FILE, - IOT_LIST + IOT_LIST, + IOT_DIR ///< output directory }; /// Comparison operator @@ -78,6 +79,11 @@ namespace OpenMS return param_name.compare(rhs.param_name) < 0; } } + /// Comparison operator + bool operator==(const IOInfo& rhs) const + { + return type == rhs.type && param_name == rhs.param_name; + } /// Assignment operator IOInfo& operator=(const IOInfo& rhs) @@ -92,10 +98,9 @@ namespace OpenMS /// Is any of the input/output parameters a list? static bool isAnyList(const QVector& params) { - for (QVector::const_iterator it = params.begin(); - it != params.end(); ++it) + for (const auto& p : params) { - if (it->type == IOT_LIST) return true; + if (p.type == IOT_LIST) return true; } return false; } @@ -118,15 +123,17 @@ namespace OpenMS ~TOPPASToolVertex() override = default; /// Assignment operator TOPPASToolVertex& operator=(const TOPPASToolVertex& rhs); + + virtual std::unique_ptr clone() const override; /// returns the name of the TOPP tool String getName() const override; /// Returns the type of the tool const String& getType() const; - /// Fills @p input_infos with the required input file/list parameters together with their valid types. - void getInputParameters(QVector& input_infos) const; - /// Fills @p output_infos with the required output file/list parameters together with their valid types. - void getOutputParameters(QVector& output_infos) const; + /// Returns input file/list parameters together with their valid types. + QVector getInputParameters() const; + /// Returns output file/list/dir parameters together with their valid types. + QVector getOutputParameters() const; // documented in base class void paint(QPainter* painter, const QStyleOptionGraphicsItem* option, QWidget* widget) override; // documented in base class @@ -223,8 +230,8 @@ public slots: bool renameOutput_(); /// Initializes the parameters with standard values (from -write_ini), uses the parameters from the old_ini_file if given, returns if parameters have changed (if old_ini_file was given) bool initParam_(const QString& old_ini_file = ""); - /// Fills @p io_infos with the required input/output file/list parameters. If @p input_params is true, input params are returned, otherwise output params. - void getParameters_(QVector& io_infos, bool input_params) const; + /// returns input/output file/list parameters. If @p input_params is true, input params are returned, otherwise output params. + QVector getParameters_(bool input_params) const; /// Writes @p param to the @p ini_file void writeParam_(const Param& param, const QString& ini_file); /// Helper method for finding good boundaries for wrapping the tool name. Returns a string with whitespaces at the preferred boundaries. diff --git a/src/openms_gui/include/OpenMS/VISUAL/TOPPASVertex.h b/src/openms_gui/include/OpenMS/VISUAL/TOPPASVertex.h index 898b82cc852..b2bbac038b3 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/TOPPASVertex.h +++ b/src/openms_gui/include/OpenMS/VISUAL/TOPPASVertex.h @@ -131,12 +131,12 @@ namespace OpenMS /// indexing via "parameter_index" of adjacent edge (could later be param_name) -> filenames /// Index for input and output edges is (-1) implicitly, thus we need signed type /// warning: the index refers to either input OR output (depending on if this structure is used for input files storage or output files storage) - typedef std::map RoundPackage; - typedef RoundPackage::const_iterator RoundPackageConstIt; - typedef RoundPackage::iterator RoundPackageIt; + using RoundPackage = std::map; + using RoundPackageConstIt = RoundPackage::const_iterator; + using RoundPackageIt = RoundPackage::iterator; /// all information a node needs to process all rounds - typedef std::vector RoundPackages; + using RoundPackages = std::vector; /// The color of a vertex during depth-first search enum DFS_COLOR @@ -162,6 +162,10 @@ namespace OpenMS ~TOPPASVertex() override = default; /// Assignment operator TOPPASVertex& operator=(const TOPPASVertex & rhs); + + /// Make a copy of this vertex on the heap and return a pointer to it (useful for copying nodes) + virtual std::unique_ptr clone() const = 0; + /// base paint method for all derived classes. should be called first in child-class paint void paint(QPainter* painter, const QStyleOptionGraphicsItem* /*option*/, QWidget* /*widget*/, bool round_shape = true); @@ -346,5 +350,6 @@ public slots: } }; -} + +} // namespace OpenMS diff --git a/src/openms_gui/include/OpenMS/VISUAL/sources.cmake b/src/openms_gui/include/OpenMS/VISUAL/sources.cmake index b8e8aad5401..1dcc05237a2 100644 --- a/src/openms_gui/include/OpenMS/VISUAL/sources.cmake +++ b/src/openms_gui/include/OpenMS/VISUAL/sources.cmake @@ -56,7 +56,9 @@ TableView.h TOPPASEdge.h TOPPASInputFileListVertex.h TOPPASMergerVertex.h +TOPPASOutputVertex.h TOPPASOutputFileListVertex.h +TOPPASOutputFolderVertex.h TOPPASResource.h TOPPASResources.h TOPPASScene.h diff --git a/src/openms_gui/source/VISUAL/APPLICATIONS/GUITOOLS/INIFileEditor.cpp b/src/openms_gui/source/VISUAL/APPLICATIONS/GUITOOLS/INIFileEditor.cpp index e3724b59092..d06f3a7e6c3 100644 --- a/src/openms_gui/source/VISUAL/APPLICATIONS/GUITOOLS/INIFileEditor.cpp +++ b/src/openms_gui/source/VISUAL/APPLICATIONS/GUITOOLS/INIFileEditor.cpp @@ -12,7 +12,6 @@ #include #include #include -#include #include #if !defined(__APPLE__) diff --git a/src/openms_gui/source/VISUAL/APPLICATIONS/GUITOOLS/TOPPAS.cpp b/src/openms_gui/source/VISUAL/APPLICATIONS/GUITOOLS/TOPPAS.cpp index 4c39260be39..c3644ae3932 100644 --- a/src/openms_gui/source/VISUAL/APPLICATIONS/GUITOOLS/TOPPAS.cpp +++ b/src/openms_gui/source/VISUAL/APPLICATIONS/GUITOOLS/TOPPAS.cpp @@ -76,7 +76,7 @@ void print_usage(Logger::LogStream& stream = OpenMS_Log_info) << tool_name << " -- An assistant for GUI-driven TOPP workflow design." << "\n" << "\n" << "Usage:" << "\n" - << " " << tool_name << " [options] [files]" << "\n" + << " " << tool_name << " [options] [.toppas files]" << "\n" << "\n" << "Options are:" << "\n" << " --help Shows this help" << "\n" diff --git a/src/openms_gui/source/VISUAL/APPLICATIONS/MISC/QApplicationTOPP.cpp b/src/openms_gui/source/VISUAL/APPLICATIONS/MISC/QApplicationTOPP.cpp index 61f2cf18e27..e344971df2b 100644 --- a/src/openms_gui/source/VISUAL/APPLICATIONS/MISC/QApplicationTOPP.cpp +++ b/src/openms_gui/source/VISUAL/APPLICATIONS/MISC/QApplicationTOPP.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -116,24 +117,23 @@ namespace OpenMS // text QString text = QString("
    " - "%1
    " - "
    " - "Version %2 %3" - "
    " - "OpenMS and TOPP is free software available under the
    " - "BSD 3-Clause License (BSD-new)
    " - "
    " - "
    " - "
    " - "
    " - "
    " - "Any published work based on TOPP and OpenMS shall cite these papers:
    " - "Roest, Sachsenberg, Aiche, Bielow, Weisser et al., Nat Methods (2016), 13(9):741-748
    " - "Kohlbacher et al., Bioinformatics (2007), 23:e191-e197
    ") + "%1
    " + "
    " + "Version %2 %3" + "
    " + "OpenMS and TOPP is free software available under the
    " + "BSD 3-Clause License (BSD-new)
    " + "
    " + "
    " + "
    " + "
    " + "
    " + "Any published work based on TOPP and OpenMS shall cite:
    %4") .arg(toolname) .arg(VersionInfo::getVersion().toQString()) .arg( // if we have a revision, embed it also into the shown version number - VersionInfo::getRevision().empty() ? "" : QString(" (") + VersionInfo::getRevision().toQString() + ")"); + VersionInfo::getRevision().empty() ? "" : QString(" (") + VersionInfo::getRevision().toQString() + ")") + .arg((TOPPBase::cite_openms.title + "
    " + TOPPBase::cite_openms.when_where + "
    doi:" + TOPPBase::cite_openms.doi).c_str()); label = new QLabel(text, dlg); diff --git a/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPASBase.cpp b/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPASBase.cpp index cb4745a647e..403ade3b212 100644 --- a/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPASBase.cpp +++ b/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPASBase.cpp @@ -72,6 +72,7 @@ #include #include #include +#include using namespace std; @@ -437,21 +438,19 @@ namespace OpenMS header_labels.append(QString("TOPP tools")); tools_tree_view->setHeaderLabels(header_labels); - QTreeWidgetItem* item = new QTreeWidgetItem((QTreeWidget*)nullptr); - item->setText(0, ""); - tools_tree_view->addTopLevelItem(item); - item = new QTreeWidgetItem((QTreeWidget*)nullptr); - item->setText(0, ""); - tools_tree_view->addTopLevelItem(item); - item = new QTreeWidgetItem((QTreeWidget*)nullptr); - item->setText(0, ""); - tools_tree_view->addTopLevelItem(item); - item = new QTreeWidgetItem((QTreeWidget*)nullptr); - item->setText(0, ""); - tools_tree_view->addTopLevelItem(item); - item = new QTreeWidgetItem((QTreeWidget*)nullptr); - item->setText(0, ""); - tools_tree_view->addTopLevelItem(item); + auto add_list_item = [&tools_tree_view](const QString& node_name, const QString& tool_tip) + { + QTreeWidgetItem* item = new QTreeWidgetItem(tools_tree_view); + item->setText(0, node_name); + item->setToolTip(0, tool_tip); + tools_tree_view->addTopLevelItem(item); + }; + add_list_item("", "One or multiple input files, such as mzML or FASTA files from your local hard drive"); + add_list_item("", "Sink for one or more output files, which are produced by a TOPP tool and which you want to keep for later."); + add_list_item("", "Some TOPP tools write their output to a folder. Usually a fixed set of files, whose names cannot be set explicitly."); + add_list_item("", "Concatenate files from multiple input edges to a list and forward that list."); + add_list_item("", "Collect each single file from \na single input edge (for every time it runs)\nand then foward this list to the next tool (which is only invoked once)"); + add_list_item("", "Opposite of a collector."); //Param category_param = param_.copy("tool_categories:", true); @@ -475,24 +474,24 @@ namespace OpenMS std::map category_map; - foreach(const QString &category, category_list) + for (const QString &category : category_list) { - item = new QTreeWidgetItem((QTreeWidget*)nullptr); + auto item = new QTreeWidgetItem((QTreeWidget*)nullptr); item->setText(0, category); tools_tree_view->addTopLevelItem(item); category_map[category] = item; } - for (ToolListType::iterator it = tools_list.begin(); it != tools_list.end(); ++it) + for (const auto& tool : tools_list) { - item = new QTreeWidgetItem(category_map[it->second.category.toQString()]); - item->setText(0, it->first.toQString()); + auto item = new QTreeWidgetItem(category_map[tool.second.category.toQString()]); + item->setText(0, tool.first.toQString()); QTreeWidgetItem* parent_item = item; - StringList types = ToolHandler::getTypes(it->first); - for (StringList::iterator types_it = types.begin(); types_it != types.end(); ++types_it) + StringList types = ToolHandler::getTypes(tool.first); + for (const auto& type : types) { item = new QTreeWidgetItem(parent_item); - item->setText(0, types_it->toQString()); + item->setText(0, type.toQString()); } } tools_tree_view->resizeColumnToContents(0); @@ -1206,8 +1205,15 @@ namespace OpenMS { tv = new TOPPASOutputFileListVertex(); TOPPASOutputFileListVertex* oflv = dynamic_cast(tv); - connect(oflv, SIGNAL(outputFileWritten(const String &)), this, SLOT(outputVertexFinished(const String &))); - scene->connectOutputVertexSignals(oflv); + connect(tv, SIGNAL(outputFileWritten(const String &)), this, SLOT(outputVertexFinished(const String &))); + scene->connectOutputVertexSignals((TOPPASOutputVertex*)oflv); + } + else if (tool_name == "") + { + tv = new TOPPASOutputFolderVertex(); + TOPPASOutputFolderVertex* oflv = dynamic_cast(tv); + connect(tv, SIGNAL(outputFileWritten(const String&)), this, SLOT(outputVertexFinished(const String&))); + scene->connectOutputVertexSignals((TOPPASOutputVertex*)oflv); } else if (tool_name == "") { diff --git a/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPViewBase.cpp b/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPViewBase.cpp index 152e1fff035..26953944356 100644 --- a/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPViewBase.cpp +++ b/src/openms_gui/source/VISUAL/APPLICATIONS/TOPPViewBase.cpp @@ -728,7 +728,7 @@ namespace OpenMS glock.unlock(); if (!annotate_path.empty()) - { + { // this opens a new window with raw data + annotation; we want the actual idXML data on top auto load_res = addDataFile(annotate_path, false, false); if (load_res == LOAD_RESULT::OK) { @@ -745,6 +745,7 @@ namespace OpenMS log_->appendNewHeader(LogWindow::LogState::NOTICE, "Error", "Annotation failed."); } } + window_id = getActivePlotWidget()->getWindowId(); // add ids on top of raw data } } @@ -895,31 +896,30 @@ namespace OpenMS { if (data_type == LayerDataBase::DT_FEATURE) // features { - if (!target_window->canvas()->addLayer(feature_map, filename)) + if (!target_window->canvas()->addLayer(feature_map, filename, caption)) { return; } } else if (data_type == LayerDataBase::DT_CONSENSUS) // consensus features { - if (!target_window->canvas()->addLayer(consensus_map, filename)) + if (! target_window->canvas()->addLayer(consensus_map, filename, caption)) return; } else if (data_type == LayerDataBase::DT_IDENT) { - if (!target_window->canvas()->addLayer(peptides, filename)) + if (! target_window->canvas()->addLayer(peptides, filename, caption)) return; } else // peaks or chrom { - if (data_type == LayerDataBase::DT_PEAK && - !target_window->canvas()->addPeakLayer(peak_map, on_disc_peak_map, filename, use_intensity_cutoff)) + if (data_type == LayerDataBase::DT_PEAK && ! target_window->canvas()->addPeakLayer(peak_map, on_disc_peak_map, filename, caption, use_intensity_cutoff)) { return; } if (data_type == LayerDataBase::DT_CHROMATOGRAM && - !target_window->canvas()->addChromLayer(peak_map, on_disc_peak_map, filename)) + !target_window->canvas()->addChromLayer(peak_map, on_disc_peak_map, filename, caption)) { return; } @@ -946,6 +946,8 @@ namespace OpenMS { canvas->mergeIntoLayer(merge_layer, peptides); } + // combine layer names + canvas->setLayerName(merge_layer, canvas->getLayerName(merge_layer) + " + " + caption); } if (as_new_window) diff --git a/src/openms_gui/source/VISUAL/DIALOGS/TOPPASIOMappingDialog.cpp b/src/openms_gui/source/VISUAL/DIALOGS/TOPPASIOMappingDialog.cpp index fd5e267b9f6..4f0b962c635 100644 --- a/src/openms_gui/source/VISUAL/DIALOGS/TOPPASIOMappingDialog.cpp +++ b/src/openms_gui/source/VISUAL/DIALOGS/TOPPASIOMappingDialog.cpp @@ -10,19 +10,20 @@ #include #include +#include #include -#include #include +#include #include -#include +#include -#include #include #include #include #include +#include namespace OpenMS { @@ -53,6 +54,12 @@ namespace OpenMS { ui_->target_combo->setCurrentIndex(1); } + // if the target is an output folder and there is no output parameter in the input tool, the egde is invalid + TOPPASOutputFolderVertex* target_dir = qobject_cast(edge_->getTargetVertex()); + if (target_dir && ui_->source_combo->count() == 0) + { + return QDialog::Rejected; + } // is there only 1 possible mapping? -> do not show dialog if ((ui_->source_combo->count() == 2 || ui_->source_combo->count() == 0) && @@ -69,8 +76,6 @@ namespace OpenMS void TOPPASIOMappingDialog::fillComboBoxes_() { - target_input_param_indices_.clear(); - TOPPASVertex* source = edge_->getSourceVertex(); TOPPASVertex* target = edge_->getTargetVertex(); @@ -82,12 +87,33 @@ namespace OpenMS TOPPASSplitterVertex* target_splitter = qobject_cast(target); TOPPASInputFileListVertex* source_list = qobject_cast(source); TOPPASOutputFileListVertex* target_list = qobject_cast(target); + TOPPASOutputFolderVertex* target_dir = qobject_cast(target); + // an output folder can only be connected to a tool + if (target_dir) + { + if (!source_tool) + { // bad news: no source tool, hence no connection possible + return; + } + const auto source_output_dirs = source_tool->getOutputParameters(); + ui_->source_combo->addItem(""); - foreach(TOPPASToolVertex::IOInfo info, source_output_files) + for (TOPPASToolVertex::IOInfo info : source_output_files) { + if (info.type == TOPPASToolVertex::IOInfo::IOT_DIR) continue; String item_name; if (info.type == TOPPASToolVertex::IOInfo::IOT_FILE) { @@ -115,11 +142,7 @@ namespace OpenMS ss << info.valid_types; item_name += ss.str(); - ui_->source_combo->addItem(item_name.toQString()); - } - if (ui_->source_combo->count() == 2) // only 1 parameter - { - ui_->source_combo->setCurrentIndex(1); + ui_->source_combo->addItem(item_name.toQString(), source_output_files.indexOf(info)); } } else if (source_list || source_merger || source_splitter) @@ -143,8 +166,7 @@ namespace OpenMS if (target_tool) { - QVector target_input_files; - target_tool->getInputParameters(target_input_files); + QVector target_input_files = target_tool->getInputParameters(); ui_->target_label->setText(target_tool->getName().toQString()); if (!target_tool->getType().empty()) { @@ -155,10 +177,8 @@ namespace OpenMS ui_->target_type_label->setVisible(false); } ui_->target_combo->addItem(") + { + return 1; // pre-select the only parameter + } + return 0; // use '