From df31ffb18cf6d1d31a1183f0d0d1a2b576fdcde1 Mon Sep 17 00:00:00 2001 From: chen Date: Mon, 18 May 2020 13:18:32 +0200 Subject: [PATCH 1/3] prepareLine --- .../ANALYSIS/OPENSWATH/OpenSwathTSVWriter.cpp | 156 +++++++++--------- 1 file changed, 78 insertions(+), 78 deletions(-) diff --git a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathTSVWriter.cpp b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathTSVWriter.cpp index 2d7e9b3027e..9dab6c98db2 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathTSVWriter.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/OpenSwathTSVWriter.cpp @@ -33,6 +33,7 @@ // -------------------------------------------------------------------------- #include +#include namespace OpenMS { @@ -108,7 +109,7 @@ namespace OpenMS const OpenSwath::LightTransition * transition, const FeatureMap& output, const String id) const { - String result = ""; + std::stringstream result; String decoy = "0"; // 0 = false if (transition->decoy) { @@ -185,100 +186,99 @@ namespace OpenMS main_var = (String)feature_it->getMetaValue("main_var_xx_lda_prelim_score"); } - String line = - id + "_run0" - + "\t" + group_label - + "\t" + "0" - + "\t" + input_filename_ - + "\t" + (String)feature_it->getRT() - + "\t" + "f_" + feature_it->getUniqueId() // TODO might not be unique!!! - + "\t" + (String)pep.sequence - + "\t" + (feature_it->metaValueExists("missedCleavages") ? (String)feature_it->getMetaValue("missedCleavages") : "") - + "\t" + full_peptide_name - + "\t" + (String)pep.charge - + "\t" + (String)transition->precursor_mz - + "\t" + (String)feature_it->getIntensity() - + "\t" + protein_name - + "\t" + gene_name - + "\t" + decoy + + result << id << "_run0" + << "\t" << group_label + << "\t" << "0" + << "\t" << input_filename_ + << "\t" << (String)feature_it->getRT() + << "\t" << "f_" << feature_it->getUniqueId() // TODO might not be unique!!! + << "\t" << (String)pep.sequence + << "\t" << (feature_it->metaValueExists("missedCleavages") ? (String)feature_it->getMetaValue("missedCleavages") : "") + << "\t" << full_peptide_name + << "\t" << (String)pep.charge + << "\t" << (String)transition->precursor_mz + << "\t" << (String)feature_it->getIntensity() + << "\t" << protein_name + << "\t" << gene_name + << "\t" << decoy // Note: missing MetaValues will just produce a DataValue::EMPTY which lead to an empty column - + "\t" + (String)feature_it->getMetaValue("assay_rt") - + "\t" + (String)feature_it->getMetaValue("delta_rt") - + "\t" + (String)feature_it->getMetaValue("leftWidth") - + "\t" + main_var - + "\t" + (String)feature_it->getMetaValue("norm_RT") - + "\t" + (String)feature_it->getMetaValue("nr_peaks") - + "\t" + (String)feature_it->getMetaValue("peak_apices_sum") - + "\t" + (String)feature_it->getMetaValue("potentialOutlier") - + "\t" + (String)feature_it->getMetaValue("initialPeakQuality") - + "\t" + (String)feature_it->getMetaValue("rightWidth") - + "\t" + (String)feature_it->getMetaValue("rt_score") - + "\t" + (String)feature_it->getMetaValue("sn_ratio") - + "\t" + (String)feature_it->getMetaValue("total_xic") - + "\t" + (String)feature_it->getMetaValue("var_bseries_score") - + "\t" + (String)feature_it->getMetaValue("var_dotprod_score") - + "\t" + (String)feature_it->getMetaValue("var_intensity_score") - + "\t" + (String)feature_it->getMetaValue("var_isotope_correlation_score") - + "\t" + (String)feature_it->getMetaValue("var_isotope_overlap_score") - + "\t" + (String)feature_it->getMetaValue("var_library_corr") - + "\t" + (String)feature_it->getMetaValue("var_library_dotprod") - + "\t" + (String)feature_it->getMetaValue("var_library_manhattan") - + "\t" + (String)feature_it->getMetaValue("var_library_rmsd") - + "\t" + (String)feature_it->getMetaValue("var_library_rootmeansquare") - + "\t" + (String)feature_it->getMetaValue("var_library_sangle") - + "\t" + (String)feature_it->getMetaValue("var_log_sn_score") - + "\t" + (String)feature_it->getMetaValue("var_manhatt_score") - + "\t" + (String)feature_it->getMetaValue("var_massdev_score") - + "\t" + (String)feature_it->getMetaValue("var_massdev_score_weighted") - + "\t" + (String)feature_it->getMetaValue("var_norm_rt_score") - + "\t" + (String)feature_it->getMetaValue("var_xcorr_coelution") - + "\t" + (String)feature_it->getMetaValue("var_xcorr_coelution_weighted") - + "\t" + (String)feature_it->getMetaValue("var_xcorr_shape") - + "\t" + (String)feature_it->getMetaValue("var_xcorr_shape_weighted") + << "\t" << (String)feature_it->getMetaValue("assay_rt") + << "\t" << (String)feature_it->getMetaValue("delta_rt") + << "\t" << (String)feature_it->getMetaValue("leftWidth") + << "\t" << main_var + << "\t" << (String)feature_it->getMetaValue("norm_RT") + << "\t" << (String)feature_it->getMetaValue("nr_peaks") + << "\t" << (String)feature_it->getMetaValue("peak_apices_sum") + << "\t" << (String)feature_it->getMetaValue("potentialOutlier") + << "\t" << (String)feature_it->getMetaValue("initialPeakQuality") + << "\t" << (String)feature_it->getMetaValue("rightWidth") + << "\t" << (String)feature_it->getMetaValue("rt_score") + << "\t" << (String)feature_it->getMetaValue("sn_ratio") + << "\t" << (String)feature_it->getMetaValue("total_xic") + << "\t" << (String)feature_it->getMetaValue("var_bseries_score") + << "\t" << (String)feature_it->getMetaValue("var_dotprod_score") + << "\t" << (String)feature_it->getMetaValue("var_intensity_score") + << "\t" << (String)feature_it->getMetaValue("var_isotope_correlation_score") + << "\t" << (String)feature_it->getMetaValue("var_isotope_overlap_score") + << "\t" << (String)feature_it->getMetaValue("var_library_corr") + << "\t" << (String)feature_it->getMetaValue("var_library_dotprod") + << "\t" << (String)feature_it->getMetaValue("var_library_manhattan") + << "\t" << (String)feature_it->getMetaValue("var_library_rmsd") + << "\t" << (String)feature_it->getMetaValue("var_library_rootmeansquare") + << "\t" << (String)feature_it->getMetaValue("var_library_sangle") + << "\t" << (String)feature_it->getMetaValue("var_log_sn_score") + << "\t" << (String)feature_it->getMetaValue("var_manhatt_score") + << "\t" << (String)feature_it->getMetaValue("var_massdev_score") + << "\t" << (String)feature_it->getMetaValue("var_massdev_score_weighted") + << "\t" << (String)feature_it->getMetaValue("var_norm_rt_score") + << "\t" << (String)feature_it->getMetaValue("var_xcorr_coelution") + << "\t" << (String)feature_it->getMetaValue("var_xcorr_coelution_weighted") + << "\t" << (String)feature_it->getMetaValue("var_xcorr_shape") + << "\t" << (String)feature_it->getMetaValue("var_xcorr_shape_weighted") - + "\t" + (String)feature_it->getMetaValue("var_im_xcorr_shape") - + "\t" + (String)feature_it->getMetaValue("var_im_xcorr_coelution") - + "\t" + (String)feature_it->getMetaValue("var_im_delta_score") - + "\t" + (String)feature_it->getMetaValue("var_im_ms1_delta_score") - + "\t" + (String)feature_it->getMetaValue("im_drift") - + "\t" + (String)feature_it->getMetaValue("im_drift_weighted") + << "\t" << (String)feature_it->getMetaValue("var_im_xcorr_shape") + << "\t" << (String)feature_it->getMetaValue("var_im_xcorr_coelution") + << "\t" << (String)feature_it->getMetaValue("var_im_delta_score") + << "\t" << (String)feature_it->getMetaValue("var_im_ms1_delta_score") + << "\t" << (String)feature_it->getMetaValue("im_drift") + << "\t" << (String)feature_it->getMetaValue("im_drift_weighted") - + "\t" + (String)feature_it->getMetaValue("var_yseries_score") - + "\t" + (String)feature_it->getMetaValue("var_elution_model_fit_score"); + << "\t" << (String)feature_it->getMetaValue("var_yseries_score") + << "\t" << (String)feature_it->getMetaValue("var_elution_model_fit_score"); if (use_ms1_traces_) { - line += "\t" + (String)feature_it->getMetaValue("var_ms1_ppm_diff") - + "\t" + (String)feature_it->getMetaValue("var_ms1_isotope_correlation") - + "\t" + (String)feature_it->getMetaValue("var_ms1_isotope_overlap") - + "\t" + (String)feature_it->getMetaValue("var_ms1_xcorr_coelution") - + "\t" + (String)feature_it->getMetaValue("var_ms1_xcorr_shape"); + result << "\t" << (String)feature_it->getMetaValue("var_ms1_ppm_diff") + << "\t" << (String)feature_it->getMetaValue("var_ms1_isotope_correlation") + << "\t" << (String)feature_it->getMetaValue("var_ms1_isotope_overlap") + << "\t" << (String)feature_it->getMetaValue("var_ms1_xcorr_coelution") + << "\t" << (String)feature_it->getMetaValue("var_ms1_xcorr_shape"); } - line += "\t" + (String)feature_it->getMetaValue("xx_lda_prelim_score") - + "\t" + (String)feature_it->getMetaValue("xx_swath_prelim_score"); + result << "\t" << (String)feature_it->getMetaValue("xx_lda_prelim_score") + << "\t" << (String)feature_it->getMetaValue("xx_swath_prelim_score"); if (sonar_) { - line += "\t" + (String)feature_it->getMetaValue("var_sonar_lag") - + "\t" + (String)feature_it->getMetaValue("var_sonar_shape") - + "\t" + (String)feature_it->getMetaValue("var_sonar_log_sn") - + "\t" + (String)feature_it->getMetaValue("var_sonar_log_diff") - + "\t" + (String)feature_it->getMetaValue("var_sonar_log_trend") - + "\t" + (String)feature_it->getMetaValue("var_sonar_rsq"); + result << "\t" << (String)feature_it->getMetaValue("var_sonar_lag") + << "\t" << (String)feature_it->getMetaValue("var_sonar_shape") + << "\t" << (String)feature_it->getMetaValue("var_sonar_log_sn") + << "\t" << (String)feature_it->getMetaValue("var_sonar_log_diff") + << "\t" << (String)feature_it->getMetaValue("var_sonar_log_trend") + << "\t" << (String)feature_it->getMetaValue("var_sonar_rsq"); } if (use_ms1_traces_) { - line += "\t" + ListUtils::concatenate(aggr_prec_Peak_Area, ";") + "\t" + ListUtils::concatenate(aggr_prec_Peak_Apex, ";") + "\t" + ListUtils::concatenate(aggr_prec_Fragment_Annotation, ";"); + result << "\t" << ListUtils::concatenate(aggr_prec_Peak_Area, ";") << "\t" << ListUtils::concatenate(aggr_prec_Peak_Apex, ";") << "\t" << ListUtils::concatenate(aggr_prec_Fragment_Annotation, ";"); } - line += "\t" + ListUtils::concatenate(aggr_Peak_Area, ";") + "\t" + ListUtils::concatenate(aggr_Peak_Apex, ";") + "\t" + ListUtils::concatenate(aggr_Fragment_Annotation, ";"); - line += "\t" + ListUtils::concatenate(rt_fwhm, ";"); - line += "\t" + (feature_it->metaValueExists("masserror_ppm") ? ListUtils::concatenate(feature_it->getMetaValue("masserror_ppm").toDoubleList(), ";") : ""); + result << "\t" << ListUtils::concatenate(aggr_Peak_Area, ";") << "\t" << ListUtils::concatenate(aggr_Peak_Apex, ";") << "\t" << ListUtils::concatenate(aggr_Fragment_Annotation, ";"); + result << "\t" << ListUtils::concatenate(rt_fwhm, ";"); + result << "\t" << (feature_it->metaValueExists("masserror_ppm") ? ListUtils::concatenate(feature_it->getMetaValue("masserror_ppm").toDoubleList(), ";") : ""); - line += "\n"; - result += line; + result << "\n"; } // end of iteration - return result; + return result.str(); } void OpenSwathTSVWriter::writeLines(const std::vector& to_output) From 22251be9adb51693fd28cdc01a51f35f2c78c0c5 Mon Sep 17 00:00:00 2001 From: chen Date: Wed, 20 May 2020 11:03:36 +0200 Subject: [PATCH 2/3] SignalToNoiseEstimator --- .../DATAACCESS/MRMFeatureAccessOpenMS.h | 4 +- .../NOISEESTIMATION/SignalToNoiseEstimator.h | 53 ++++--------------- .../SignalToNoiseEstimatorMeanIterative.h | 28 +++++----- .../SignalToNoiseEstimatorMedian.h | 27 +++++----- .../RAW2PEAK/PeakPickerIterative.h | 4 +- .../ANALYSIS/OPENSWATH/PeakPickerMRM.cpp | 4 +- .../OPENSWATH/TargetedSpectraExtractor.cpp | 6 +-- .../FeatureFinderAlgorithmMRM.cpp | 8 +-- .../RAW2PEAK/PeakPickerCWT.cpp | 6 +-- .../RAW2PEAK/PeakPickerHiRes.cpp | 14 ++--- ...gnalToNoiseEstimatorMeanIterative_test.cpp | 24 ++++----- .../SignalToNoiseEstimatorMedian_test.cpp | 6 +-- .../source/SignalToNoiseEstimator_test.cpp | 21 ++------ src/topp/EICExtractor.cpp | 2 +- src/topp/FileFilter.cpp | 10 ++-- 15 files changed, 85 insertions(+), 132 deletions(-) diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DATAACCESS/MRMFeatureAccessOpenMS.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DATAACCESS/MRMFeatureAccessOpenMS.h index 6fae33b44f9..15ecfa68f44 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DATAACCESS/MRMFeatureAccessOpenMS.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DATAACCESS/MRMFeatureAccessOpenMS.h @@ -206,12 +206,12 @@ namespace OpenMS if (std::fabs(prev->getMZ() - RT) < std::fabs(iter->getMZ() - RT) ) { // prev is closer to the apex - return sn_.getSignalToNoise(*prev); + return sn_.getSignalToNoise((Size) distance(chromatogram_.begin(),prev)); } else { // iter is closer to the apex - return sn_.getSignalToNoise(*iter); + return sn_.getSignalToNoise((Size) distance(chromatogram_.begin(),iter)); } } diff --git a/src/openms/include/OpenMS/FILTERING/NOISEESTIMATION/SignalToNoiseEstimator.h b/src/openms/include/OpenMS/FILTERING/NOISEESTIMATION/SignalToNoiseEstimator.h index 551e68a11d8..c6bfcee5174 100644 --- a/src/openms/include/OpenMS/FILTERING/NOISEESTIMATION/SignalToNoiseEstimator.h +++ b/src/openms/include/OpenMS/FILTERING/NOISEESTIMATION/SignalToNoiseEstimator.h @@ -71,8 +71,7 @@ namespace OpenMS inline SignalToNoiseEstimator() : DefaultParamHandler("SignalToNoiseEstimator"), ProgressLogger(), - first_(), - last_(), + c(), is_result_valid_(false) { } @@ -82,8 +81,7 @@ namespace OpenMS DefaultParamHandler(source), ProgressLogger(source), stn_estimates_(source.stn_estimates_), - first_(source.first_), - last_(source.last_), + c(source.c), is_result_valid_(source.is_result_valid_) {} @@ -95,8 +93,7 @@ namespace OpenMS DefaultParamHandler::operator=(source); ProgressLogger::operator=(source); stn_estimates_ = source.stn_estimates_; - first_ = source.first_; - last_ = source.last_; + c = source.c; return *this; } @@ -104,47 +101,22 @@ namespace OpenMS ~SignalToNoiseEstimator() override {} - /// Set the start and endpoint of the raw data interval, for which signal to noise ratios will be estimated immediately - virtual void init(const PeakIterator & it_begin, const PeakIterator & it_end) + virtual void init(const Container& c) { - first_ = it_begin; - last_ = it_end; - computeSTN_(first_, last_); + computeSTN_(c); is_result_valid_ = true; } - /// Set the start and endpoint of the raw data interval, for which signal to noise ratios will be estimated immediately - virtual void init(const Container & c) - { - init(c.begin(), c.end()); - } - - /// Return to signal/noise estimate for data point @p data_point - /// @note the first query to this function will take longer, as - /// all SignalToNoise values are calculated - /// @note you will get a warning to stderr if more than 20% of the - /// noise estimates used sparse windows - virtual double getSignalToNoise(const PeakIterator & data_point) - { - if (!is_result_valid_) - { - // recompute ... - init(first_, last_); - } - - return stn_estimates_[*data_point]; - } - - virtual double getSignalToNoise(const PeakType & data_point) + virtual double getSignalToNoise(const Size index) { if (!is_result_valid_) { // recompute ... - init(first_, last_); + init(c); } - return stn_estimates_[data_point]; + return stn_estimates_[index]; } protected: @@ -154,7 +126,7 @@ namespace OpenMS * * @exception Throws Exception::InvalidValue */ - virtual void computeSTN_(const PeakIterator & scan_first_, const PeakIterator & scan_last_) = 0; + virtual void computeSTN_(const Container& c) = 0; @@ -204,12 +176,9 @@ namespace OpenMS //MEMBERS: /// stores the noise estimate for each peak - std::map stn_estimates_; + std::vector stn_estimates_; - /// points to the first raw data point in the interval - PeakIterator first_; - /// points to the right position next to the last raw data point in the interval - PeakIterator last_; + Container c; /// flag: set to true if SignalToNoise estimates are calculated and none of the params were changed. otherwise false. mutable bool is_result_valid_; }; diff --git a/src/openms/include/OpenMS/FILTERING/NOISEESTIMATION/SignalToNoiseEstimatorMeanIterative.h b/src/openms/include/OpenMS/FILTERING/NOISEESTIMATION/SignalToNoiseEstimatorMeanIterative.h index e69c8812188..d8c709705c3 100644 --- a/src/openms/include/OpenMS/FILTERING/NOISEESTIMATION/SignalToNoiseEstimatorMeanIterative.h +++ b/src/openms/include/OpenMS/FILTERING/NOISEESTIMATION/SignalToNoiseEstimatorMeanIterative.h @@ -39,6 +39,7 @@ #include #include #include +#include //for std::max_element namespace OpenMS { @@ -76,8 +77,6 @@ namespace OpenMS enum IntensityThresholdCalculation {MANUAL = -1, AUTOMAXBYSTDEV = 0, AUTOMAXBYPERCENT = 1}; using SignalToNoiseEstimator::stn_estimates_; - using SignalToNoiseEstimator::first_; - using SignalToNoiseEstimator::last_; using SignalToNoiseEstimator::is_result_valid_; using SignalToNoiseEstimator::defaults_; using SignalToNoiseEstimator::param_; @@ -168,13 +167,17 @@ namespace OpenMS @param scan_last_ last element in the scan (disregarded) @exception Throws Exception::InvalidValue */ - void computeSTN_(const PeakIterator & scan_first_, const PeakIterator & scan_last_) override + void computeSTN_(const Container& c) override { + PeakIterator scan_first_ = c.begin(); + PeakIterator scan_last_ = c.end(); + // reset counter for sparse windows double sparse_window_percent = 0; // reset the results stn_estimates_.clear(); + stn_estimates_.resize(c.size()); // maximal range of histogram needs to be calculated first if (auto_mode_ == AUTOMAXBYSTDEV) @@ -209,11 +212,13 @@ namespace OpenMS ++size; ++run; } +// auto maxIt = std::max_element(c.begin(), c.end() ,[](const PeakType& a, const PeakType& b){ return a.getIntensity() > b.getIntensity();}); +// typename PeakType::IntensityType maxInt = maxIt->getIntensity(); double bin_size = maxInt / 100; // fill histogram - run = scan_first_; + run = scan_first_; while (run != scan_last_) { ++histogram_auto[(int) (((*run).getIntensity() - 1) / bin_size)]; @@ -221,7 +226,7 @@ namespace OpenMS } // add up element counts in histogram until ?th percentile is reached - int elements_below_percentile = (int) (auto_max_percentile_ * size / 100); + int elements_below_percentile = (int) (auto_max_percentile_ * c.size() / 100); int elements_seen = 0; int i = -1; run = scan_first_; @@ -283,15 +288,8 @@ namespace OpenMS double noise; // noise value of a datapoint - // determine how many elements we need to estimate (for progress estimation) - int windows_overall = 0; - PeakIterator run = scan_first_; - while (run != scan_last_) - { - ++windows_overall; - ++run; - } - SignalToNoiseEstimator::startProgress(0, windows_overall, "noise estimation of data"); + + SignalToNoiseEstimator::startProgress(0, c.size(), "noise estimation of data"); // MAIN LOOP while (window_pos_center != scan_last_) @@ -370,7 +368,7 @@ namespace OpenMS } // store result - stn_estimates_[*window_pos_center] = (*window_pos_center).getIntensity() / noise; + stn_estimates_[window_count] = (*window_pos_center).getIntensity() / noise; diff --git a/src/openms/include/OpenMS/FILTERING/NOISEESTIMATION/SignalToNoiseEstimatorMedian.h b/src/openms/include/OpenMS/FILTERING/NOISEESTIMATION/SignalToNoiseEstimatorMedian.h index aca1d784285..ee69b8848a7 100644 --- a/src/openms/include/OpenMS/FILTERING/NOISEESTIMATION/SignalToNoiseEstimatorMedian.h +++ b/src/openms/include/OpenMS/FILTERING/NOISEESTIMATION/SignalToNoiseEstimatorMedian.h @@ -41,6 +41,7 @@ #include #include #include +#include //for std::max_element namespace OpenMS { @@ -87,8 +88,6 @@ namespace OpenMS enum IntensityThresholdCalculation {MANUAL = -1, AUTOMAXBYSTDEV = 0, AUTOMAXBYPERCENT = 1}; using SignalToNoiseEstimator::stn_estimates_; - using SignalToNoiseEstimator::first_; - using SignalToNoiseEstimator::last_; using SignalToNoiseEstimator::is_result_valid_; using SignalToNoiseEstimator::defaults_; using SignalToNoiseEstimator::param_; @@ -188,8 +187,10 @@ namespace OpenMS @param scan_last_ last element in the scan (disregarded) @exception Throws Exception::InvalidValue */ - void computeSTN_(const PeakIterator & scan_first_, const PeakIterator & scan_last_) override + void computeSTN_(const Container& c) override { + PeakIterator scan_first_ = c.begin(); + PeakIterator scan_last_ = c.end(); // reset counter for sparse windows sparse_window_percent_ = 0; // reset counter for histogram overflow @@ -197,6 +198,7 @@ namespace OpenMS // reset the results stn_estimates_.clear(); + stn_estimates_.resize(c.size()); // maximal range of histogram needs to be calculated first if (auto_mode_ == AUTOMAXBYSTDEV) @@ -231,11 +233,13 @@ namespace OpenMS ++size; ++run; } +// auto maxIt = std::max_element(c.begin(), c.end() ,[](const PeakType& a, const PeakType& b){ return a.getIntensity() > b.getIntensity();}); +// typename PeakType::IntensityType maxInt = maxIt->getIntensity(); double bin_size = maxInt / 100; // fill histogram - run = scan_first_; + run = scan_first_; while (run != scan_last_) { ++histogram_auto[(int) (((*run).getIntensity() - 1) / bin_size)]; @@ -243,7 +247,7 @@ namespace OpenMS } // add up element counts in histogram until ?th percentile is reached - int elements_below_percentile = (int) (auto_max_percentile_ * size / 100); + int elements_below_percentile = (int) (auto_max_percentile_ * c.size() / 100); int elements_seen = 0; int i = -1; run = scan_first_; @@ -310,15 +314,8 @@ namespace OpenMS double noise; // noise value of a datapoint - // determine how many elements we need to estimate (for progress estimation) - int windows_overall = 0; - PeakIterator run = scan_first_; - while (run != scan_last_) - { - ++windows_overall; - ++run; - } - SignalToNoiseEstimator::startProgress(0, windows_overall, "noise estimation of data"); + + SignalToNoiseEstimator::startProgress(0, c.size(), "noise estimation of data"); // MAIN LOOP while (window_pos_center != scan_last_) @@ -369,7 +366,7 @@ namespace OpenMS } // store result - stn_estimates_[*window_pos_center] = (*window_pos_center).getIntensity() / noise; + stn_estimates_[window_count] = (*window_pos_center).getIntensity() / noise; // advance the window center by one datapoint diff --git a/src/openms/include/OpenMS/TRANSFORMATIONS/RAW2PEAK/PeakPickerIterative.h b/src/openms/include/OpenMS/TRANSFORMATIONS/RAW2PEAK/PeakPickerIterative.h index d1793c8702a..e099e701d7e 100644 --- a/src/openms/include/OpenMS/TRANSFORMATIONS/RAW2PEAK/PeakPickerIterative.h +++ b/src/openms/include/OpenMS/TRANSFORMATIONS/RAW2PEAK/PeakPickerIterative.h @@ -209,7 +209,7 @@ namespace OpenMS { if (signal_to_noise_ > 0.0) { - if (snt.getSignalToNoise(input[i - k]) < signal_to_noise_) + if (snt.getSignalToNoise(i - k) < signal_to_noise_) { break; } @@ -229,7 +229,7 @@ namespace OpenMS { if (signal_to_noise_ > 0.0) { - if (snt.getSignalToNoise(input[i + k]) < signal_to_noise_) + if (snt.getSignalToNoise(i + k) < signal_to_noise_) { break; } diff --git a/src/openms/source/ANALYSIS/OPENSWATH/PeakPickerMRM.cpp b/src/openms/source/ANALYSIS/OPENSWATH/PeakPickerMRM.cpp index 9572ab989a1..7b8c3f195dd 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/PeakPickerMRM.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/PeakPickerMRM.cpp @@ -198,7 +198,7 @@ namespace OpenMS //&& std::fabs(chromatogram[min_i-k].getMZ() - peak_raw_data.begin()->first) < spacing_difference*min_spacing && (chromatogram[min_i - k].getIntensity() < chromatogram[min_i - k + 1].getIntensity() || (peak_width_ > 0.0 && std::fabs(chromatogram[min_i - k].getRT() - central_peak_rt) < peak_width_)) - && (signal_to_noise_ <= 0.0 || snt_.getSignalToNoise(chromatogram[min_i - k]) >= signal_to_noise_)) + && (signal_to_noise_ <= 0.0 || snt_.getSignalToNoise(min_i - k) >= signal_to_noise_)) { ++k; } @@ -210,7 +210,7 @@ namespace OpenMS //&& std::fabs(chromatogram[min_i+k].getMZ() - peak_raw_data.rbegin()->first) < spacing_difference*min_spacing && (chromatogram[min_i + k].getIntensity() < chromatogram[min_i + k - 1].getIntensity() || (peak_width_ > 0.0 && std::fabs(chromatogram[min_i + k].getRT() - central_peak_rt) < peak_width_)) - && (signal_to_noise_ <= 0.0 || snt_.getSignalToNoise(chromatogram[min_i + k]) >= signal_to_noise_) ) + && (signal_to_noise_ <= 0.0 || snt_.getSignalToNoise(min_i + k) >= signal_to_noise_) ) { ++k; } diff --git a/src/openms/source/ANALYSIS/OPENSWATH/TargetedSpectraExtractor.cpp b/src/openms/source/ANALYSIS/OPENSWATH/TargetedSpectraExtractor.cpp index 6932aa2267c..c22e1022d01 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/TargetedSpectraExtractor.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/TargetedSpectraExtractor.cpp @@ -325,10 +325,10 @@ namespace OpenMS p.setValue("noise_for_empty_window", 2.0); p.setValue("min_required_elements", 10); sne.setParameters(p); - sne.init(annotated_spectra[i].begin(), annotated_spectra[i].end()); + sne.init(annotated_spectra[i]); double avgSNR { 0 }; - for (MSSpectrum::const_iterator it = annotated_spectra[i].begin(); - it != annotated_spectra[i].end(); + for (Size it = 0; + it < annotated_spectra[i].size(); ++it) { avgSNR += sne.getSignalToNoise(it); diff --git a/src/openms/source/TRANSFORMATIONS/FEATUREFINDER/FeatureFinderAlgorithmMRM.cpp b/src/openms/source/TRANSFORMATIONS/FEATUREFINDER/FeatureFinderAlgorithmMRM.cpp index 7aaa4a8ba9c..9df99fdd5ad 100644 --- a/src/openms/source/TRANSFORMATIONS/FEATUREFINDER/FeatureFinderAlgorithmMRM.cpp +++ b/src/openms/source/TRANSFORMATIONS/FEATUREFINDER/FeatureFinderAlgorithmMRM.cpp @@ -168,7 +168,7 @@ namespace OpenMS continue; } sne.setParameters(sne_param); - sne.init(chromatogram.begin(), chromatogram.end()); + sne.init(chromatogram); if (write_debuginfo) { @@ -176,17 +176,17 @@ namespace OpenMS } PeakSpectrum::FloatDataArray signal_to_noise; - for (PeakSpectrum::Iterator sit = chromatogram.begin(); sit != chromatogram.end(); ++sit) + for (Size sit = 0; sit < chromatogram.size(); ++sit) { double sn(sne.getSignalToNoise(sit)); signal_to_noise.push_back(sn); if (write_debuginfo) { - std::cerr << sit->getMZ() << " " << sit->getIntensity() << " " << sn << std::endl; + std::cerr << chromatogram[sit].getMZ() << " " << chromatogram[sit].getIntensity() << " " << sn << std::endl; } if (min_signal_to_noise_ratio == 0 || sn > min_signal_to_noise_ratio) { - sn_chrom.push_back(*sit); + sn_chrom.push_back(chromatogram[sit]); } } chromatogram.getFloatDataArrays().push_back(signal_to_noise); diff --git a/src/openms/source/TRANSFORMATIONS/RAW2PEAK/PeakPickerCWT.cpp b/src/openms/source/TRANSFORMATIONS/RAW2PEAK/PeakPickerCWT.cpp index 05b5edc751a..e59f67c20c2 100644 --- a/src/openms/source/TRANSFORMATIONS/RAW2PEAK/PeakPickerCWT.cpp +++ b/src/openms/source/TRANSFORMATIONS/RAW2PEAK/PeakPickerCWT.cpp @@ -1142,7 +1142,7 @@ namespace OpenMS PeakIterator it_pick_begin = raw_peak_array.begin(); PeakIterator it_pick_end = raw_peak_array.end(); - sne.init(it_pick_begin, it_pick_end); + sne.init(raw_peak_array); // Upper peak width bound double fwhm_upper_bound = (double)param_.getValue("fwhm_upper_bound_factor") * scale_; @@ -1179,7 +1179,7 @@ namespace OpenMS { // if the signal to noise ratio at the max position is too small // the peak isn't considered - if ((area.max != it_pick_end) && (sne.getSignalToNoise(area.max) < signal_to_noise_)) + if ((area.max != it_pick_end) && (sne.getSignalToNoise((Size) distance(raw_peak_array.begin(),area.max)) < signal_to_noise_)) { it_pick_begin = area.max; distance_from_scan_border = distance(raw_peak_array.begin(), it_pick_begin); @@ -1222,7 +1222,7 @@ namespace OpenMS && (shape.getFWHM() >= fwhm_bound_) && (shape.getFWHM() <= fwhm_upper_bound)) { - shape.signal_to_noise = sne.getSignalToNoise(area.max); + shape.signal_to_noise = sne.getSignalToNoise((Size) distance(raw_peak_array.begin(),area.max)); peak_shapes.push_back(shape); ++number_of_peaks; } diff --git a/src/openms/source/TRANSFORMATIONS/RAW2PEAK/PeakPickerHiRes.cpp b/src/openms/source/TRANSFORMATIONS/RAW2PEAK/PeakPickerHiRes.cpp index 0296ce01a4f..4176984ce0f 100644 --- a/src/openms/source/TRANSFORMATIONS/RAW2PEAK/PeakPickerHiRes.cpp +++ b/src/openms/source/TRANSFORMATIONS/RAW2PEAK/PeakPickerHiRes.cpp @@ -170,9 +170,9 @@ namespace OpenMS double act_snt = 0.0, act_snt_l1 = 0.0, act_snt_r1 = 0.0; if (signal_to_noise_ > 0.0) { - act_snt = snt.getSignalToNoise(input[i]); - act_snt_l1 = snt.getSignalToNoise(input[i - 1]); - act_snt_r1 = snt.getSignalToNoise(input[i + 1]); + act_snt = snt.getSignalToNoise(i); + act_snt_l1 = snt.getSignalToNoise(i - 1); + act_snt_r1 = snt.getSignalToNoise(i + 1); } // look for peak cores meeting MZ and intensity/SNT criteria @@ -193,8 +193,8 @@ namespace OpenMS if (signal_to_noise_ > 0.0) { - act_snt_l2 = snt.getSignalToNoise(input[i - 2]); - act_snt_r2 = snt.getSignalToNoise(input[i + 2]); + act_snt_l2 = snt.getSignalToNoise(i - 2); + act_snt_r2 = snt.getSignalToNoise(i + 2); } // checking signal-to-noise? @@ -238,7 +238,7 @@ namespace OpenMS if (signal_to_noise_ > 0.0) { - act_snt_lk = snt.getSignalToNoise(input[i - k]); + act_snt_lk = snt.getSignalToNoise(i - k); } if ((act_snt_lk >= signal_to_noise_) && @@ -279,7 +279,7 @@ namespace OpenMS if (signal_to_noise_ > 0.0) { - act_snt_rk = snt.getSignalToNoise(input[i + k]); + act_snt_rk = snt.getSignalToNoise(i + k); } if ((act_snt_rk >= signal_to_noise_) && diff --git a/src/tests/class_tests/openms/source/SignalToNoiseEstimatorMeanIterative_test.cpp b/src/tests/class_tests/openms/source/SignalToNoiseEstimatorMeanIterative_test.cpp index c891d067202..935d8a61316 100644 --- a/src/tests/class_tests/openms/source/SignalToNoiseEstimatorMeanIterative_test.cpp +++ b/src/tests/class_tests/openms/source/SignalToNoiseEstimatorMeanIterative_test.cpp @@ -82,7 +82,7 @@ START_SECTION((virtual ~SignalToNoiseEstimatorMeanIterative())) END_SECTION -START_SECTION([EXTRA](virtual void init(const PeakIterator& it_begin, const PeakIterator& it_end))) +START_SECTION([EXTRA](virtual void init(const Container& c))) TOLERANCE_ABSOLUTE(0.5) @@ -90,38 +90,38 @@ START_SECTION([EXTRA](virtual void init(const PeakIterator& it_begin, const Peak MSSpectrum::const_iterator it; DTAFile dta_file; dta_file.load(OPENMS_GET_TEST_DATA_PATH("SignalToNoiseEstimator_test.dta"), raw_data); - - - SignalToNoiseEstimatorMeanIterative<> sne; + + + SignalToNoiseEstimatorMeanIterative<> sne; Param p; p.setValue("win_len", 40.1); p.setValue("noise_for_empty_window", 2.0); p.setValue("min_required_elements", 10); sne.setParameters(p); - sne.init(raw_data.begin(),raw_data.end()); + sne.init(raw_data); MSSpectrum stn_data; - + #ifdef DEBUG_TEST MSSpectrum stn_data__; #endif - + dta_file.load(OPENMS_GET_TEST_DATA_PATH("SignalToNoiseEstimatorMeanIterative_test.out"), stn_data); int i = 0; for (it=raw_data.begin();it!=raw_data.end(); ++it) { - TEST_REAL_SIMILAR (stn_data[i].getIntensity(), sne.getSignalToNoise(it)); -#ifdef DEBUG_TEST + TEST_REAL_SIMILAR (stn_data[i].getIntensity(), sne.getSignalToNoise(i)); +#ifdef DEBUG_TEST Peak1D peak = (*it); peak.setIntensity(stn_data[i].getIntensity() / sne.getSignalToNoise(it)); stn_data__.push_back(peak); -#endif +#endif ++i; } - + #ifdef DEBUG_TEST dta_file.store(OPENMS_GET_TEST_DATA_PATH("SignalToNoiseEstimatorMeanIterative_test.debug"), stn_data__); -#endif +#endif END_SECTION diff --git a/src/tests/class_tests/openms/source/SignalToNoiseEstimatorMedian_test.cpp b/src/tests/class_tests/openms/source/SignalToNoiseEstimatorMedian_test.cpp index b4686cee945..04bdaeadd3d 100644 --- a/src/tests/class_tests/openms/source/SignalToNoiseEstimatorMedian_test.cpp +++ b/src/tests/class_tests/openms/source/SignalToNoiseEstimatorMedian_test.cpp @@ -77,7 +77,7 @@ START_SECTION((virtual ~SignalToNoiseEstimatorMedian())) END_SECTION -START_SECTION([EXTRA](virtual void init(const PeakIterator& it_begin, const PeakIterator& it_end))) +START_SECTION([EXTRA](virtual void init(const Container& c))) MSSpectrum raw_data; MSSpectrum::const_iterator it; @@ -91,14 +91,14 @@ START_SECTION([EXTRA](virtual void init(const PeakIterator& it_begin, const Peak p.setValue("noise_for_empty_window", 2.0); p.setValue("min_required_elements", 10); sne.setParameters(p); - sne.init(raw_data.begin(),raw_data.end()); + sne.init(raw_data); MSSpectrum stn_data; dta_file.load(OPENMS_GET_TEST_DATA_PATH("SignalToNoiseEstimatorMedian_test.out"), stn_data); int i = 0; for (it=raw_data.begin();it!=raw_data.end(); ++it) { - TEST_REAL_SIMILAR (stn_data[i].getIntensity(), sne.getSignalToNoise(it)); + TEST_REAL_SIMILAR (stn_data[i].getIntensity(), sne.getSignalToNoise(i)); //Peak1D peak = (*it); diff --git a/src/tests/class_tests/openms/source/SignalToNoiseEstimator_test.cpp b/src/tests/class_tests/openms/source/SignalToNoiseEstimator_test.cpp index 132688bf15a..ae20abdf7dd 100644 --- a/src/tests/class_tests/openms/source/SignalToNoiseEstimator_test.cpp +++ b/src/tests/class_tests/openms/source/SignalToNoiseEstimator_test.cpp @@ -67,10 +67,10 @@ class TestSignalToNoiseEstimator protected: - void computeSTN_(const PeakIterator& scan_first_, const PeakIterator& scan_last_) + void computeSTN_(const MSSpectrum& C) throw() override { - if (scan_first_ == scan_last_) + if (C.begin() == C.end()) { std::cout << "bla"; } @@ -95,7 +95,7 @@ END_SECTION START_SECTION((SignalToNoiseEstimator(const SignalToNoiseEstimator &source))) TestSignalToNoiseEstimator sne; MSSpectrum spec; - sne.init(spec.begin(), spec.end()); + sne.init(spec); TestSignalToNoiseEstimator sne_copy(sne); NOT_TESTABLE END_SECTION @@ -104,7 +104,7 @@ END_SECTION START_SECTION((SignalToNoiseEstimator& operator=(const SignalToNoiseEstimator &source))) TestSignalToNoiseEstimator sne; MSSpectrum spec; - sne.init(spec.begin(), spec.end()); + sne.init(spec); TestSignalToNoiseEstimator sne_copy; sne_copy = sne; NOT_TESTABLE @@ -116,13 +116,6 @@ START_SECTION((virtual ~SignalToNoiseEstimator())) END_SECTION -START_SECTION((virtual void init(const PeakIterator& it_begin, const PeakIterator& it_end))) - TestSignalToNoiseEstimator sne; - MSSpectrum spec; - sne.init(spec.begin(), spec.end()); - NOT_TESTABLE -END_SECTION - START_SECTION((virtual void init(const Container& c))) TestSignalToNoiseEstimator sne; MSSpectrum spec; @@ -130,12 +123,8 @@ START_SECTION((virtual void init(const Container& c))) NOT_TESTABLE END_SECTION -START_SECTION((virtual double getSignalToNoise(const PeakIterator& data_point))) - // hard to do without implementing computeSTN_ properly - NOT_TESTABLE -END_SECTION -START_SECTION((virtual double getSignalToNoise(const PeakType &data_point))) +START_SECTION((virtual double getSignalToNoise(const Size index))) // hard to do without implementing computeSTN_ properly NOT_TESTABLE END_SECTION diff --git a/src/topp/EICExtractor.cpp b/src/topp/EICExtractor.cpp index 1bcd20d041f..1c988198cae 100644 --- a/src/topp/EICExtractor.cpp +++ b/src/topp/EICExtractor.cpp @@ -338,7 +338,7 @@ class TOPPEICExtractor : { Peak1D peak; peak.setMZ(tic[is].getMZ()); - peak.setIntensity(snt.getSignalToNoise(tics[is])); + peak.setIntensity(snt.getSignalToNoise(is)); tics_sn.push_back(peak); } out_debug.addChromatogram(toChromatogram(tics_sn)); diff --git a/src/topp/FileFilter.cpp b/src/topp/FileFilter.cpp index 3631dd991d9..a96f122a250 100644 --- a/src/topp/FileFilter.cpp +++ b/src/topp/FileFilter.cpp @@ -851,14 +851,14 @@ class TOPPFileFilter : SignalToNoiseEstimatorMedian snm; Param const& dc_param = getParam_().copy("algorithm:SignalToNoise:", true); snm.setParameters(dc_param); - for (MapType::Iterator it = exp.begin(); it != exp.end(); ++it) + for (Size it = 0; it != exp.size(); ++it) { - snm.init(it->begin(), it->end()); - for (MapType::SpectrumType::Iterator spec = it->begin(); spec != it->end(); ++spec) + snm.init(exp[it]); + for (Size spec = 0; spec != exp[it].size(); ++spec) { - if (snm.getSignalToNoise(spec) < sn) spec->setIntensity(0); + if (snm.getSignalToNoise(spec) < sn) exp[it][spec].setIntensity(0); } - it->erase(remove_if(it->begin(), it->end(), InIntensityRange(1, numeric_limits::max(), true)), it->end()); + exp[it].erase(remove_if(exp[it].begin(), exp[it].end(), InIntensityRange(1, numeric_limits::max(), true)), exp[it].end()); } } From b2b8728926d23591a158b56ed31ee4b9f5e16c59 Mon Sep 17 00:00:00 2001 From: chen Date: Mon, 25 May 2020 10:32:20 +0200 Subject: [PATCH 3/3] Precomputation for DIAScoring and DIAHelper --- .../OpenMS/ANALYSIS/OPENSWATH/DIAHelper.h | 12 +- .../OpenMS/ANALYSIS/OPENSWATH/DIAPrescoring.h | 10 +- .../OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h | 9 +- .../DATAREDUCTION/IsotopeDistributionCache.h | 31 ++++- .../source/ANALYSIS/OPENSWATH/DIAHelper.cpp | 25 ++-- .../ANALYSIS/OPENSWATH/DIAPrescoring.cpp | 12 +- .../source/ANALYSIS/OPENSWATH/DIAScoring.cpp | 37 ++--- .../IsotopeDistributionCache.cpp | 129 +++++++++++------- src/utils/OpenSwathDIAPreScoring.cpp | 6 +- 9 files changed, 161 insertions(+), 110 deletions(-) diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAHelper.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAHelper.h index 56d20ba439c..99e11f649c0 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAHelper.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAHelper.h @@ -36,6 +36,7 @@ #include #include +#include #include @@ -44,7 +45,6 @@ namespace OpenMS class TheoreticalSpectrumGenerator; namespace DIAHelpers { - /** @brief Helper functions for the DIA scoring of OpenSWATH */ @@ -110,14 +110,16 @@ namespace OpenMS UInt charge = 1u); /// get averagine distribution given mass - OPENMS_DLLAPI void getAveragineIsotopeDistribution(const double product_mz, + OPENMS_DLLAPI void getAveragineIsotopeDistribution( IsotopeDistributionCache& iso, + const double product_mz, std::vector >& isotopesSpec, const double charge = 1., const int nr_isotopes = 4, const double mannmass = 1.00048); /// simulate spectrum from AASequence - OPENMS_DLLAPI void simulateSpectrumFromAASequence(const AASequence& aa, + OPENMS_DLLAPI void simulateSpectrumFromAASequence(IsotopeDistributionCache& iso, + const AASequence& aa, std::vector& firstIsotopeMasses, //[out] std::vector >& isotopeMasses, //[out] TheoreticalSpectrumGenerator const * g, @@ -137,7 +139,7 @@ namespace OpenMS double charge = 1.); /// given an experimental spectrum add isotope pattern. - OPENMS_DLLAPI void addIsotopes2Spec(const std::vector >& spec, + OPENMS_DLLAPI void addIsotopes2Spec( IsotopeDistributionCache& iso, const std::vector >& spec, std::vector >& isotopeMasses, //[out] double charge = 1.); @@ -147,7 +149,7 @@ namespace OpenMS OPENMS_DLLAPI void extractFirst(const std::vector >& peaks, std::vector& mass); /// extract second from vector of pairs OPENMS_DLLAPI void extractSecond(const std::vector >& peaks, std::vector& mass); - + ///}@ } } diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAPrescoring.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAPrescoring.h index c0650de7a3d..ed5018700a7 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAPrescoring.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAPrescoring.h @@ -45,6 +45,8 @@ #include +#include + namespace OpenMS { /** @@ -60,7 +62,6 @@ namespace OpenMS and compute manhattan distance and dotprod score between spectrum intensities and simulated spectrum. */ - class OPENMS_DLLAPI DiaPrescore : public DefaultParamHandler { @@ -84,7 +85,8 @@ namespace OpenMS and compute manhattan distance and dotprod score between spectrum intensities and simulated spectrum. */ - void score(OpenSwath::SpectrumPtr spec, + void score(IsotopeDistributionCache& iso, + OpenSwath::SpectrumPtr spec, const std::vector& lt, double& dotprod, double& manhattan); @@ -93,9 +95,11 @@ namespace OpenMS @brief Compute manhattan and dotprod score for all spectra which can be accessed by the SpectrumAccessPtr for all transitions groups in the LightTargetedExperiment. */ - void operator()(OpenSwath::SpectrumAccessPtr swath_ptr, + void operator()(IsotopeDistributionCache& iso, + OpenSwath::SpectrumAccessPtr swath_ptr, OpenSwath::LightTargetedExperiment& transition_exp_used, OpenSwath::IDataFrameWriter* ivw); + }; diff --git a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h index cb94ba21e08..d50342c72c5 100644 --- a/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h +++ b/src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h @@ -42,6 +42,7 @@ #include #include #include +#include namespace OpenMS { @@ -92,7 +93,6 @@ namespace OpenMS //@} public: - ///@name Constructors and Destructor //@{ /// Default constructor @@ -146,9 +146,10 @@ namespace OpenMS const std::vector& transitions, double& dotprod, double& manhattan); - //@} -private: + IsotopeDistributionCache& getCache(); + //@} + private: /// Copy constructor (algorithm class) DIAScoring(const DIAScoring& rhs); @@ -215,6 +216,8 @@ namespace OpenMS bool dia_extraction_ppm_; bool dia_centroided_; + IsotopeDistributionCache iso_; + TheoreticalSpectrumGenerator * generator; }; } diff --git a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/IsotopeDistributionCache.h b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/IsotopeDistributionCache.h index a2f7d179781..6d339a7e961 100644 --- a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/IsotopeDistributionCache.h +++ b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/IsotopeDistributionCache.h @@ -36,27 +36,50 @@ #include #include +#include namespace OpenMS { /** * @brief Pre-calculate isotope distributions for interesting mass ranges */ - class OPENMS_DLLAPI IsotopeDistributionCache + class OPENMS_DLLAPI IsotopeDistributionCache: + public FeatureFinderAlgorithmPickedHelperStructs { public: typedef FeatureFinderAlgorithmPickedHelperStructs::TheoreticalIsotopePattern TheoreticalIsotopePattern; - IsotopeDistributionCache(double max_mass, double mass_window_width, double intensity_percentage = 0, double intensity_percentage_optional = 0); + /// @name Constructors and Destructors + //@{ + /** Default constructor + */ + IsotopeDistributionCache(); + + /// Destructor + ~IsotopeDistributionCache() = default; + //@} + + + void calculateisotopeDistribution(Size num_begin); + + void renormalize( TheoreticalIsotopePattern& isotopes, IsotopeDistribution& isotope_dist); /// Returns the isotope distribution for a certain mass window - const TheoreticalIsotopePattern & getIsotopeDistribution(double mass) const; + const TheoreticalIsotopePattern& getIsotopeDistribution(double mass) ; + + const IsotopeDistribution& getIntensity(double mass); private: /// Vector of pre-calculated isotope distributions for several mass windows std::vector isotope_distributions_; + std::vector intensity_; + double mass_window_width_; + + double intensity_percentage_ ; + + double intensity_percentage_optional_; }; -} +}//namespace OpenMS diff --git a/src/openms/source/ANALYSIS/OPENSWATH/DIAHelper.cpp b/src/openms/source/ANALYSIS/OPENSWATH/DIAHelper.cpp index d58f54a7b41..8e295d0d3e0 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/DIAHelper.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/DIAHelper.cpp @@ -39,11 +39,14 @@ #include #include +#include + #include #include #include #include +#include namespace OpenMS { @@ -252,7 +255,8 @@ namespace OpenMS } } // end getBYSeries - void getAveragineIsotopeDistribution(const double product_mz, + void getAveragineIsotopeDistribution( IsotopeDistributionCache& iso, + const double product_mz, std::vector >& isotopesSpec, const double charge, const int nr_isotopes, @@ -260,12 +264,13 @@ namespace OpenMS { typedef OpenMS::FeatureFinderAlgorithmPickedHelperStructs::TheoreticalIsotopePattern TheoreticalIsotopePattern; // create the theoretical distribution - CoarseIsotopePatternGenerator solver(nr_isotopes); - TheoreticalIsotopePattern isotopes; - auto d = solver.estimateFromPeptideWeight(product_mz * charge); +// CoarseIsotopePatternGenerator solver(nr_isotopes); +// TheoreticalIsotopePattern isotopes; +// auto d = solver.estimateFromPeptideWeight(product_mz * charge); + auto d = iso.getIntensity(product_mz * charge); double mass = product_mz; - for (IsotopeDistribution::Iterator it = d.begin(); it != d.end(); ++it) + for (IsotopeDistribution::iterator it = d.begin() ; it != d.end(); ++it) { isotopesSpec.push_back(std::make_pair(mass, it->getIntensity())); mass += mannmass; @@ -273,7 +278,8 @@ namespace OpenMS } //end of dia_isotope_corr_sub //simulate spectrum from AASequence - void simulateSpectrumFromAASequence(const AASequence& aa, + void simulateSpectrumFromAASequence(IsotopeDistributionCache& iso, + const AASequence& aa, std::vector& firstIsotopeMasses, //[out] std::vector >& isotopeMasses, //[out] TheoreticalSpectrumGenerator const * generator, double charge) @@ -281,13 +287,12 @@ namespace OpenMS getTheorMasses(aa, firstIsotopeMasses, generator, charge); for (std::size_t i = 0; i < firstIsotopeMasses.size(); ++i) { - getAveragineIsotopeDistribution(firstIsotopeMasses[i], isotopeMasses, - charge); + getAveragineIsotopeDistribution(iso, firstIsotopeMasses[i], isotopeMasses,charge); } } //given an experimental spectrum add isotope pattern. - void addIsotopes2Spec(const std::vector >& spec, + void addIsotopes2Spec( IsotopeDistributionCache& iso, const std::vector >& spec, std::vector >& isotopeMasses, //[out] double charge) { @@ -295,7 +300,7 @@ namespace OpenMS for (std::size_t i = 0; i < spec.size(); ++i) { std::vector > isotopes; - getAveragineIsotopeDistribution(spec[i].first, isotopes, charge); + getAveragineIsotopeDistribution(iso,spec[i].first, isotopes,charge); for (Size j = 0; j < isotopes.size(); ++j) { isotopes[j].second *= spec[i].second; //multiple isotope intensity by spec intensity diff --git a/src/openms/source/ANALYSIS/OPENSWATH/DIAPrescoring.cpp b/src/openms/source/ANALYSIS/OPENSWATH/DIAPrescoring.cpp index 5348c59c24e..7843e081c52 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/DIAPrescoring.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/DIAPrescoring.cpp @@ -72,7 +72,8 @@ namespace OpenMS } } - void DiaPrescore::operator()(OpenSwath::SpectrumAccessPtr swath_ptr, + void DiaPrescore::operator()(IsotopeDistributionCache& iso, + OpenSwath::SpectrumAccessPtr swath_ptr, OpenSwath::LightTargetedExperiment& transition_exp_used, OpenSwath::IDataFrameWriter* ivw) { @@ -114,7 +115,7 @@ namespace OpenMS double score1; double score2; //OpenSwath::LightPeptide pep; - score(spec, beg->second, score1, score2); + score(iso,spec, beg->second, score1, score2); score1v.push_back(score1); score2v.push_back(score2); @@ -127,7 +128,8 @@ namespace OpenMS } //end of forloop over spectra } - void DiaPrescore::score(OpenSwath::SpectrumPtr spec, + void DiaPrescore::score( IsotopeDistributionCache& iso, + OpenSwath::SpectrumPtr spec, const std::vector& lt, double& dotprod, double& manhattan) @@ -137,7 +139,7 @@ namespace OpenMS std::vector firstIstotope, theomasses; DIAHelpers::extractFirst(res, firstIstotope); std::vector > spectrum, spectrum2; - DIAHelpers::addIsotopes2Spec(res, spectrum, nr_charges_); + DIAHelpers::addIsotopes2Spec(iso,res,spectrum, nr_charges_); spectrum2.resize(spectrum.size()); std::copy(spectrum.begin(), spectrum.end(), spectrum2.begin()); //std::cout << spectrum.size() << std::endl; @@ -204,7 +206,7 @@ namespace OpenMS { } - DiaPrescore::DiaPrescore() : + DiaPrescore::DiaPrescore(): DefaultParamHandler("DIAPrescore") { defineDefaults(); diff --git a/src/openms/source/ANALYSIS/OPENSWATH/DIAScoring.cpp b/src/openms/source/ANALYSIS/OPENSWATH/DIAScoring.cpp index 5b47b777f65..63e53a536e0 100644 --- a/src/openms/source/ANALYSIS/OPENSWATH/DIAScoring.cpp +++ b/src/openms/source/ANALYSIS/OPENSWATH/DIAScoring.cpp @@ -268,7 +268,12 @@ namespace OpenMS double& dotprod, double& manhattan) { OpenMS::DiaPrescore dp(dia_extract_window_, dia_nr_isotopes_, dia_nr_charges_); - dp.score(spectrum, transitions, dotprod, manhattan); + dp.score(iso_,spectrum, transitions, dotprod, manhattan); + } + + IsotopeDistributionCache& DIAScoring::getCache() + { + return iso_; } /////////////////////////////////////////////////////////////////////////// @@ -295,6 +300,7 @@ namespace OpenMS std::vector isotopes_int; double max_ratio; int nr_occurences; + for (Size k = 0; k < transitions.size(); k++) { isotopes_int.clear(); @@ -394,37 +400,13 @@ namespace OpenMS // create the theoretical distribution from the sum formula EmpiricalFormula empf(sum_formula); isotope_dist = empf.getIsotopeDistribution(CoarseIsotopePatternGenerator(dia_nr_isotopes_)); + iso_.renormalize(isotopes, isotope_dist); } else { // create the theoretical distribution from the peptide weight - CoarseIsotopePatternGenerator solver(dia_nr_isotopes_ + 1); - isotope_dist = solver.estimateFromPeptideWeight(std::fabs(product_mz * putative_fragment_charge)); - } - - - for (IsotopeDistribution::Iterator it = isotope_dist.begin(); it != isotope_dist.end(); ++it) - { - isotopes.intensity.push_back(it->getIntensity()); + isotopes = iso_.getIsotopeDistribution(std::fabs(product_mz * putative_fragment_charge)); } - isotopes.optional_begin = 0; - isotopes.optional_end = dia_nr_isotopes_; - - // scale the distribution to a maximum of 1 - double max = 0.0; - for (Size i = 0; i < isotopes.intensity.size(); ++i) - { - if (isotopes.intensity[i] > max) - { - max = isotopes.intensity[i]; - } - } - isotopes.max = max; - for (Size i = 0; i < isotopes.intensity.size(); ++i) - { - isotopes.intensity[i] /= max; - } - isotopes.trimmed_left = 0; // score the pattern against a theoretical one double int_score = OpenSwath::cor_pearson(isotopes_int.begin(), isotopes_int.end(), isotopes.intensity.begin()); @@ -436,4 +418,5 @@ namespace OpenMS } //end of dia_isotope_corr_sub + } diff --git a/src/openms/source/FILTERING/DATAREDUCTION/IsotopeDistributionCache.cpp b/src/openms/source/FILTERING/DATAREDUCTION/IsotopeDistributionCache.cpp index 85d0eea94a4..0fce357a3e5 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/IsotopeDistributionCache.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/IsotopeDistributionCache.cpp @@ -40,89 +40,116 @@ namespace OpenMS { - IsotopeDistributionCache::IsotopeDistributionCache(double max_mass, double mass_window_width, double intensity_percentage, double intensity_percentage_optional) : - mass_window_width_(mass_window_width) + IsotopeDistributionCache::IsotopeDistributionCache() { - Size num_isotopes = std::ceil(max_mass / mass_window_width) + 1; + mass_window_width_ = 20.0; + intensity_percentage_=0.0; + intensity_percentage_optional_ =0.0; - //reserve enough space - isotope_distributions_.resize(num_isotopes); + } + + + void IsotopeDistributionCache::calculateisotopeDistribution(OpenMS::Size num_begin) + { //calculate distribution if necessary - for (Size index = 0; index < num_isotopes; ++index) + for (Size index = num_begin; index < isotope_distributions_.size() ; ++index) { //log_ << "Calculating iso dist for mass: " << 0.5*mass_window_width_ + index * mass_window_width_ << std::endl; CoarseIsotopePatternGenerator solver(20); - auto d = solver.estimateFromPeptideWeight(0.5 * mass_window_width + index * mass_window_width); + auto d = solver.estimateFromPeptideWeight(0.5*mass_window_width_ + index * mass_window_width_); + + intensity_[index] = d; //trim left and right. And store the number of isotopes on the left, to reconstruct the monoisotopic peak Size size_before = d.size(); - d.trimLeft(intensity_percentage_optional); + d.trimLeft(intensity_percentage_optional_); isotope_distributions_[index].trimmed_left = size_before - d.size(); - d.trimRight(intensity_percentage_optional); + d.trimRight(intensity_percentage_optional_); + + + renormalize(isotope_distributions_[index], d); + + } + + } + + void IsotopeDistributionCache::renormalize( + OpenMS::IsotopeDistributionCache::TheoreticalIsotopePattern& isotopes, + OpenMS::IsotopeDistribution& isotope_dist) + { + for (IsotopeDistribution::Iterator it = isotope_dist.begin(); it != isotope_dist.end(); ++it) + { + isotopes.intensity.push_back(it->getIntensity()); + //log_ << " - " << it->second << std::endl; + } - for (IsotopeDistribution::Iterator it = d.begin(); it != d.end(); ++it) + //determine the number of optional peaks at the beginning/end + Size begin = 0; + Size end = 0; + bool is_begin = true; + bool is_end = false; + double max = 0.0; + for (Size i = 0; i < isotopes.intensity.size(); ++i) + { + if (isotopes.intensity[i] < intensity_percentage_) { - isotope_distributions_[index].intensity.push_back(it->getIntensity()); - //log_ << " - " << it->second << std::endl; + if (!is_end && !is_begin) is_end = true; + if (is_begin) ++begin; + else if (is_end) ++end; } - - //determine the number of optional peaks at the beginning/end - Size begin = 0; - Size end = 0; - bool is_begin = true; - bool is_end = false; - for (Size i = 0; i < isotope_distributions_[index].intensity.size(); ++i) + else if (is_begin) { - if (isotope_distributions_[index].intensity[i] < intensity_percentage) - { - if (!is_end && !is_begin) - is_end = true; - if (is_begin) - ++begin; - else if (is_end) - ++end; - } - else if (is_begin) - { - is_begin = false; - } + is_begin = false; } - isotope_distributions_[index].optional_begin = begin; - isotope_distributions_[index].optional_end = end; //scale the distribution to a maximum of 1 - double max = 0.0; - for (Size i = 0; i < isotope_distributions_[index].intensity.size(); ++i) + if (isotopes.intensity[i] > max) { - if (isotope_distributions_[index].intensity[i] > max) - { - max = isotope_distributions_[index].intensity[i]; - } + max = isotopes.intensity[i]; } + } + isotopes.optional_begin = begin; + isotopes.optional_end = end; - isotope_distributions_[index].max = max; + isotopes.max = max; - for (Size i = 0; i < isotope_distributions_[index].intensity.size(); ++i) - { - isotope_distributions_[index].intensity[i] /= max; - } + for (Size i = 0; i < isotopes.intensity.size(); ++i) + { + isotopes.intensity[i] /= max; } } // Returns the isotope distribution for a certain mass window - const IsotopeDistributionCache::TheoreticalIsotopePattern& IsotopeDistributionCache::getIsotopeDistribution(double mass) const + const IsotopeDistributionCache::TheoreticalIsotopePattern& IsotopeDistributionCache::getIsotopeDistribution(double mass) { - //calculate index in the vector - Size index = static_cast(std::floor(mass / mass_window_width_)); - + Size index = Size (std::floor(mass/mass_window_width_)); if (index >= isotope_distributions_.size()) { - throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "IsotopeDistribution not precalculated. Maximum allowed index is " + String(isotope_distributions_.size()), String(index)); + Size size = isotope_distributions_.size(); + isotope_distributions_.resize(index + 1); + intensity_.resize(index + 1); + calculateisotopeDistribution(size); } //Return distribution - return isotope_distributions_[index]; + return isotope_distributions_[(int)index]; + } + + //Return the intensity for a mass + const IsotopeDistribution& IsotopeDistributionCache::getIntensity(double mass) + { + Size index = Size (std::floor(mass)); + if (index >= intensity_.size()) + { + Size size = intensity_.size(); + isotope_distributions_.resize(index + 1); + intensity_.resize(index + 1); + calculateisotopeDistribution(size); + } + + //Return intensity + return intensity_[(int)index]; } } diff --git a/src/utils/OpenSwathDIAPreScoring.cpp b/src/utils/OpenSwathDIAPreScoring.cpp index 3f68a78900a..5b8550fcee9 100644 --- a/src/utils/OpenSwathDIAPreScoring.cpp +++ b/src/utils/OpenSwathDIAPreScoring.cpp @@ -48,6 +48,8 @@ #include #include +#include + #include #include @@ -145,7 +147,7 @@ class DIAPreScoring : std::cout << ltrans << std::endl; } // Here we deal with SWATH files (can be multiple files) - + DIAScoring dia; for (Size i = 0; i < file_list.size(); ++i) { MzMLFile swath_file; @@ -201,7 +203,7 @@ class DIAPreScoring : //std::cout << "using data frame writer for storing data. Outfile :" << out << std::endl; OpenSwath::IDataFrameWriter* dfw = new OpenSwath::CSVWriter(fname); OpenMS::DiaPrescore dp; - dp.operator()(spectrumAccess, transition_exp_used, dfw); + dp.operator()(dia.getCache(),spectrumAccess, transition_exp_used, dfw); delete dfw; //featureFinder.pickExperiment(chromatogram_ptr, out_featureFile, //transition_exp_used, trafo, swath_ptr, transition_group_map);