Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimization in OpenSwathWorkflow #92

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -206,12 +206,12 @@ namespace OpenMS
if (std::fabs(prev->getMZ() - RT) < std::fabs(iter->getMZ() - RT) )
{
// prev is closer to the apex
return sn_.getSignalToNoise(*prev);
return sn_.getSignalToNoise((Size) distance(chromatogram_.begin(),prev));
}
else
{
// iter is closer to the apex
return sn_.getSignalToNoise(*iter);
return sn_.getSignalToNoise((Size) distance(chromatogram_.begin(),iter));
}
}

Expand Down
12 changes: 7 additions & 5 deletions src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

#include <OpenMS/CHEMISTRY/AASequence.h>
#include <OpenMS/OPENSWATHALGO/DATAACCESS/DataStructures.h>
#include <OpenMS/FILTERING/DATAREDUCTION/IsotopeDistributionCache.h>

#include <vector>

Expand All @@ -44,7 +45,6 @@ namespace OpenMS
class TheoreticalSpectrumGenerator;
namespace DIAHelpers
{

/**
@brief Helper functions for the DIA scoring of OpenSWATH
*/
Expand Down Expand Up @@ -110,14 +110,16 @@ namespace OpenMS
UInt charge = 1u);

/// get averagine distribution given mass
OPENMS_DLLAPI void getAveragineIsotopeDistribution(const double product_mz,
OPENMS_DLLAPI void getAveragineIsotopeDistribution( IsotopeDistributionCache& iso,
const double product_mz,
std::vector<std::pair<double, double> >& isotopesSpec,
const double charge = 1.,
const int nr_isotopes = 4,
const double mannmass = 1.00048);

/// simulate spectrum from AASequence
OPENMS_DLLAPI void simulateSpectrumFromAASequence(const AASequence& aa,
OPENMS_DLLAPI void simulateSpectrumFromAASequence(IsotopeDistributionCache& iso,
const AASequence& aa,
std::vector<double>& firstIsotopeMasses, //[out]
std::vector<std::pair<double, double> >& isotopeMasses, //[out]
TheoreticalSpectrumGenerator const * g,
Expand All @@ -137,7 +139,7 @@ namespace OpenMS
double charge = 1.);

/// given an experimental spectrum add isotope pattern.
OPENMS_DLLAPI void addIsotopes2Spec(const std::vector<std::pair<double, double> >& spec,
OPENMS_DLLAPI void addIsotopes2Spec( IsotopeDistributionCache& iso, const std::vector<std::pair<double, double> >& spec,
std::vector<std::pair<double, double> >& isotopeMasses, //[out]
double charge = 1.);

Expand All @@ -147,7 +149,7 @@ namespace OpenMS
OPENMS_DLLAPI void extractFirst(const std::vector<std::pair<double, double> >& peaks, std::vector<double>& mass);
/// extract second from vector of pairs
OPENMS_DLLAPI void extractSecond(const std::vector<std::pair<double, double> >& peaks, std::vector<double>& mass);

///}@
}
}
Expand Down
10 changes: 7 additions & 3 deletions src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAPrescoring.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@

#include <OpenMS/DATASTRUCTURES/DefaultParamHandler.h>

#include <OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h>

namespace OpenMS
{
/**
Expand All @@ -60,7 +62,6 @@ namespace OpenMS
and compute manhattan distance and dotprod score between spectrum intensities
and simulated spectrum.
*/

class OPENMS_DLLAPI DiaPrescore :
public DefaultParamHandler
{
Expand All @@ -84,7 +85,8 @@ namespace OpenMS
and compute manhattan distance and dotprod score between spectrum intensities
and simulated spectrum.
*/
void score(OpenSwath::SpectrumPtr spec,
void score(IsotopeDistributionCache& iso,
OpenSwath::SpectrumPtr spec,
const std::vector<OpenSwath::LightTransition>& lt,
double& dotprod,
double& manhattan);
Expand All @@ -93,9 +95,11 @@ namespace OpenMS
@brief Compute manhattan and dotprod score for all spectra which can be accessed by
the SpectrumAccessPtr for all transitions groups in the LightTargetedExperiment.
*/
void operator()(OpenSwath::SpectrumAccessPtr swath_ptr,
void operator()(IsotopeDistributionCache& iso,
OpenSwath::SpectrumAccessPtr swath_ptr,
OpenSwath::LightTargetedExperiment& transition_exp_used,
OpenSwath::IDataFrameWriter* ivw);

};


Expand Down
9 changes: 6 additions & 3 deletions src/openms/include/OpenMS/ANALYSIS/OPENSWATH/DIAScoring.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include <OpenMS/OPENSWATHALGO/DATAACCESS/DataStructures.h>
#include <OpenMS/OPENSWATHALGO/DATAACCESS/ITransition.h>
#include <OpenMS/OPENSWATHALGO/DATAACCESS/TransitionExperiment.h>
#include <OpenMS/FILTERING/DATAREDUCTION/IsotopeDistributionCache.h>

namespace OpenMS
{
Expand Down Expand Up @@ -92,7 +93,6 @@ namespace OpenMS
//@}

public:

///@name Constructors and Destructor
//@{
/// Default constructor
Expand Down Expand Up @@ -146,9 +146,10 @@ namespace OpenMS
const std::vector<TransitionType>& transitions,
double& dotprod,
double& manhattan);
//@}

private:
IsotopeDistributionCache& getCache();
//@}
private:

/// Copy constructor (algorithm class)
DIAScoring(const DIAScoring& rhs);
Expand Down Expand Up @@ -215,6 +216,8 @@ namespace OpenMS
bool dia_extraction_ppm_;
bool dia_centroided_;

IsotopeDistributionCache iso_;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you call this iso_cache_ or something which reflects its purpose?


TheoreticalSpectrumGenerator * generator;
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,27 +36,50 @@

#include <OpenMS/KERNEL/StandardTypes.h>
#include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/FeatureFinderAlgorithmPickedHelperStructs.h>
#include <OpenMS/CHEMISTRY/ISOTOPEDISTRIBUTION/IsotopeDistribution.h>

namespace OpenMS
{
/**
* @brief Pre-calculate isotope distributions for interesting mass ranges
*/
class OPENMS_DLLAPI IsotopeDistributionCache
class OPENMS_DLLAPI IsotopeDistributionCache:
public FeatureFinderAlgorithmPickedHelperStructs
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why inherit from FeatureFinderAlgorithmPickedHelperStructs????

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry, I forget to delete it.

{
public:
typedef FeatureFinderAlgorithmPickedHelperStructs::TheoreticalIsotopePattern TheoreticalIsotopePattern;

IsotopeDistributionCache(double max_mass, double mass_window_width, double intensity_percentage = 0, double intensity_percentage_optional = 0);
/// @name Constructors and Destructors
//@{
/** Default constructor
*/
IsotopeDistributionCache();

/// Destructor
~IsotopeDistributionCache() = default;
//@}


void calculateisotopeDistribution(Size num_begin);

void renormalize( TheoreticalIsotopePattern& isotopes, IsotopeDistribution& isotope_dist);

/// Returns the isotope distribution for a certain mass window
const TheoreticalIsotopePattern & getIsotopeDistribution(double mass) const;
const TheoreticalIsotopePattern& getIsotopeDistribution(double mass) ;

const IsotopeDistribution& getIntensity(double mass);

private:
/// Vector of pre-calculated isotope distributions for several mass windows
std::vector<TheoreticalIsotopePattern> isotope_distributions_;

std::vector<IsotopeDistribution> intensity_;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

intensity_ is not a good name. Its a distribution...

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll call the member distribution_cache_ instead.


double mass_window_width_;

double intensity_percentage_ ;

double intensity_percentage_optional_;
};
}
}//namespace OpenMS

Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,7 @@ namespace OpenMS
inline SignalToNoiseEstimator() :
DefaultParamHandler("SignalToNoiseEstimator"),
ProgressLogger(),
first_(),
last_(),
c(),
is_result_valid_(false)
{
}
Expand All @@ -82,8 +81,7 @@ namespace OpenMS
DefaultParamHandler(source),
ProgressLogger(source),
stn_estimates_(source.stn_estimates_),
first_(source.first_),
last_(source.last_),
c(source.c),
is_result_valid_(source.is_result_valid_)
{}

Expand All @@ -95,56 +93,30 @@ namespace OpenMS
DefaultParamHandler::operator=(source);
ProgressLogger::operator=(source);
stn_estimates_ = source.stn_estimates_;
first_ = source.first_;
last_ = source.last_;
c = source.c;
return *this;
}

/// Destructor
~SignalToNoiseEstimator() override
{}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we stick to the rule of six, don't we have to add move constructor and move assignment operator?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have already added them (c with type Container).


/// Set the start and endpoint of the raw data interval, for which signal to noise ratios will be estimated immediately
virtual void init(const PeakIterator & it_begin, const PeakIterator & it_end)
virtual void init(const Container& c)
{
first_ = it_begin;
last_ = it_end;
computeSTN_(first_, last_);
computeSTN_(c);
is_result_valid_ = true;
}

/// Set the start and endpoint of the raw data interval, for which signal to noise ratios will be estimated immediately
virtual void init(const Container & c)
{
init(c.begin(), c.end());
}

/// Return to signal/noise estimate for data point @p data_point
/// @note the first query to this function will take longer, as
/// all SignalToNoise values are calculated
/// @note you will get a warning to stderr if more than 20% of the
/// noise estimates used sparse windows
virtual double getSignalToNoise(const PeakIterator & data_point)
{
if (!is_result_valid_)
{
// recompute ...
init(first_, last_);
}

return stn_estimates_[*data_point];
}

virtual double getSignalToNoise(const PeakType & data_point)
virtual double getSignalToNoise(const Size index)
{
if (!is_result_valid_)
{
// recompute ...
init(first_, last_);
init(c);
}

return stn_estimates_[data_point];
return stn_estimates_[index];
}

protected:
Expand All @@ -154,7 +126,7 @@ namespace OpenMS
*
* @exception Throws Exception::InvalidValue
*/
virtual void computeSTN_(const PeakIterator & scan_first_, const PeakIterator & scan_last_) = 0;
virtual void computeSTN_(const Container& c) = 0;



Expand Down Expand Up @@ -204,12 +176,9 @@ namespace OpenMS
//MEMBERS:

/// stores the noise estimate for each peak
std::map<PeakType, double, typename PeakType::PositionLess> stn_estimates_;
std::vector<double> stn_estimates_;

/// points to the first raw data point in the interval
PeakIterator first_;
/// points to the right position next to the last raw data point in the interval
PeakIterator last_;
Container c;
/// flag: set to true if SignalToNoise estimates are calculated and none of the params were changed. otherwise false.
mutable bool is_result_valid_;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include <OpenMS/CONCEPT/Exception.h>
#include <OpenMS/DATASTRUCTURES/ListUtils.h>
#include <vector>
#include <algorithm> //for std::max_element

namespace OpenMS
{
Expand Down Expand Up @@ -76,8 +77,6 @@ namespace OpenMS
enum IntensityThresholdCalculation {MANUAL = -1, AUTOMAXBYSTDEV = 0, AUTOMAXBYPERCENT = 1};

using SignalToNoiseEstimator<Container>::stn_estimates_;
using SignalToNoiseEstimator<Container>::first_;
using SignalToNoiseEstimator<Container>::last_;
using SignalToNoiseEstimator<Container>::is_result_valid_;
using SignalToNoiseEstimator<Container>::defaults_;
using SignalToNoiseEstimator<Container>::param_;
Expand Down Expand Up @@ -168,13 +167,17 @@ namespace OpenMS
@param scan_last_ last element in the scan (disregarded)
@exception Throws Exception::InvalidValue
*/
void computeSTN_(const PeakIterator & scan_first_, const PeakIterator & scan_last_) override
void computeSTN_(const Container& c) override
{
PeakIterator scan_first_ = c.begin();
PeakIterator scan_last_ = c.end();

// reset counter for sparse windows
double sparse_window_percent = 0;

// reset the results
stn_estimates_.clear();
stn_estimates_.resize(c.size());

// maximal range of histogram needs to be calculated first
if (auto_mode_ == AUTOMAXBYSTDEV)
Expand Down Expand Up @@ -209,19 +212,21 @@ namespace OpenMS
++size;
++run;
}
// auto maxIt = std::max_element(c.begin(), c.end() ,[](const PeakType& a, const PeakType& b){ return a.getIntensity() > b.getIntensity();});
// typename PeakType::IntensityType maxInt = maxIt->getIntensity();

double bin_size = maxInt / 100;

// fill histogram
run = scan_first_;
run = scan_first_;
while (run != scan_last_)
{
++histogram_auto[(int) (((*run).getIntensity() - 1) / bin_size)];
++run;
}

// add up element counts in histogram until ?th percentile is reached
int elements_below_percentile = (int) (auto_max_percentile_ * size / 100);
int elements_below_percentile = (int) (auto_max_percentile_ * c.size() / 100);
int elements_seen = 0;
int i = -1;
run = scan_first_;
Expand Down Expand Up @@ -283,15 +288,8 @@ namespace OpenMS

double noise; // noise value of a datapoint

// determine how many elements we need to estimate (for progress estimation)
int windows_overall = 0;
PeakIterator run = scan_first_;
while (run != scan_last_)
{
++windows_overall;
++run;
}
SignalToNoiseEstimator<Container>::startProgress(0, windows_overall, "noise estimation of data");

SignalToNoiseEstimator<Container>::startProgress(0, c.size(), "noise estimation of data");

// MAIN LOOP
while (window_pos_center != scan_last_)
Expand Down Expand Up @@ -370,7 +368,7 @@ namespace OpenMS
}

// store result
stn_estimates_[*window_pos_center] = (*window_pos_center).getIntensity() / noise;
stn_estimates_[window_count] = (*window_pos_center).getIntensity() / noise;



Expand Down
Loading