Skip to content

Commit

Permalink
Resampler: allow combination of ppm-binning without alignment (used t…
Browse files Browse the repository at this point in the history
…o result in interpreting bins as Da/Th).

Rewrite some code, so make it more robust and re-useable.
Added test.
  • Loading branch information
cbielow committed Nov 23, 2023
1 parent 3e45e6a commit 3640aef
Show file tree
Hide file tree
Showing 4 changed files with 217 additions and 62 deletions.
4 changes: 4 additions & 0 deletions src/tests/topp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1926,6 +1926,10 @@ if(WITH_GUI)
add_test("TOPP_Resampler_1" ${TOPP_BIN_PATH}/Resampler -test -in ${DATA_DIR_TOPP}/Resampler_1_input.mzML -out Resampler.mzML -sampling_rate 0.3)
add_test("TOPP_Resampler_1_out1" ${DIFF} -whitelist ${INDEX_WHITELIST} -in1 Resampler.mzML -in2 ${DATA_DIR_TOPP}/Resampler_1_output.mzML )
set_tests_properties("TOPP_Resampler_1_out1" PROPERTIES DEPENDS "TOPP_Resampler_1")

add_test("TOPP_Resampler_2" ${TOPP_BIN_PATH}/Resampler -test -in ${DATA_DIR_TOPP}/Resampler_1_input.mzML -out Resampler2_ppm.mzML -ppm -sampling_rate 2000)
add_test("TOPP_Resampler_2_out1" ${DIFF} -whitelist ${INDEX_WHITELIST} -in1 Resampler2_ppm.mzML -in2 ${DATA_DIR_TOPP}/Resampler_2_output.mzML )
set_tests_properties("TOPP_Resampler_2_out1" PROPERTIES DEPENDS "TOPP_Resampler_2")
endif()

#------------------------------------------------------------------------------
Expand Down
8 changes: 4 additions & 4 deletions src/tests/topp/Resampler_1_output.mzML
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@
<cvParam cvRef="MS" accession="MS:1000515" name="intensity array" unitAccession="MS:1000131" unitName="number of detector counts" unitCvRef="MS"/>
<cvParam cvRef="MS" accession="MS:1000521" name="32-bit float" />
<cvParam cvRef="MS" accession="MS:1000576" name="no compression" />
<binary>wP0jQ4WxREN5qcZDyP11Q/3+FEODsBpDAKGaQgAAAAAr/59Ci67+QoD5z0JpAAFECFZfRKBSLUN7pvhCFZz1QZSfCUW8T2xGxtDARn87aEXT1LVFrNURRiqqZ0Upf+tEeEIXRs4Mf0VJJVFEbL30RIHK/kTlFSRDi1JMQ+hTC0RnL75DXVR5Q2zPWkTbWyhEdVD7QvqpgkP71z9EdNMsRANRCkOd/rtDZQUdQwAAAAAAAAAA</binary>
<binary>wP0jQ4WxREN5qcZDyP11Q/3+FEODsBpDAKGaQgAAAAAr/59Ci67+QoD5z0JpAAFECFZfRKBSLUN7pvhCFZz1QZSfCUW8T2xGxtDARn87aEXT1LVFrNURRiqqZ0Upf+tEeEIXRs4Mf0VJJVFEbb30RIHK/kTlFSRDi1JMQ+hTC0RnL75DXVR5Q2zPWkTbWyhEdVD7QvupgkP71z9EdNMsRANRCkOd/rtDZQUdQwAAAAAAAAAA</binary>
</binaryDataArray>
</binaryDataArrayList>
</spectrum>
Expand All @@ -129,7 +129,7 @@
<cvParam cvRef="MS" accession="MS:1000515" name="intensity array" unitAccession="MS:1000131" unitName="number of detector counts" unitCvRef="MS"/>
<cvParam cvRef="MS" accession="MS:1000521" name="32-bit float" />
<cvParam cvRef="MS" accession="MS:1000576" name="no compression" />
<binary>wP0jQ4WxREN5qcZDyP11Q/3+FEODsBpDAKGaQgAAAAAr/59Ci67+QoD5z0JpAAFECFZfRKBSLUN7pvhCFZz1QZSfCUW8T2xGxtDARn87aEXT1LVFrNURRiqqZ0Upf+tEeEIXRs4Mf0VJJVFEbL30RIHK/kTlFSRDi1JMQ+hTC0RnL75DXVR5Q2zPWkTbWyhEdVD7QvqpgkP71z9EdNMsRANRCkOd/rtDZQUdQwAAAAAAAAAA</binary>
<binary>wP0jQ4WxREN5qcZDyP11Q/3+FEODsBpDAKGaQgAAAAAr/59Ci67+QoD5z0JpAAFECFZfRKBSLUN7pvhCFZz1QZSfCUW8T2xGxtDARn87aEXT1LVFrNURRiqqZ0Upf+tEeEIXRs4Mf0VJJVFEbb30RIHK/kTlFSRDi1JMQ+hTC0RnL75DXVR5Q2zPWkTbWyhEdVD7QvupgkP71z9EdNMsRANRCkOd/rtDZQUdQwAAAAAAAAAA</binary>
</binaryDataArray>
</binaryDataArrayList>
</spectrum>
Expand Down Expand Up @@ -170,7 +170,7 @@
<cvParam cvRef="MS" accession="MS:1000515" name="intensity array" unitAccession="MS:1000131" unitName="number of detector counts" unitCvRef="MS"/>
<cvParam cvRef="MS" accession="MS:1000521" name="32-bit float" />
<cvParam cvRef="MS" accession="MS:1000576" name="no compression" />
<binary>wP0jQ4WxREN5qcZDyP11Q/3+FEODsBpDAKGaQgAAAAAr/59Ci67+QoD5z0JpAAFECFZfRKBSLUN7pvhCFZz1QZSfCUW8T2xGxtDARn87aEXT1LVFrNURRiqqZ0Upf+tEeEIXRs4Mf0VJJVFEbL30RIHK/kTlFSRDi1JMQ+hTC0RnL75DXVR5Q2zPWkTbWyhEdVD7QvqpgkP71z9EdNMsRANRCkOd/rtDZQUdQwAAAAAAAAAA</binary>
<binary>wP0jQ4WxREN5qcZDyP11Q/3+FEODsBpDAKGaQgAAAAAr/59Ci67+QoD5z0JpAAFECFZfRKBSLUN7pvhCFZz1QZSfCUW8T2xGxtDARn87aEXT1LVFrNURRiqqZ0Upf+tEeEIXRs4Mf0VJJVFEbb30RIHK/kTlFSRDi1JMQ+hTC0RnL75DXVR5Q2zPWkTbWyhEdVD7QvupgkP71z9EdNMsRANRCkOd/rtDZQUdQwAAAAAAAAAA</binary>
</binaryDataArray>
</binaryDataArrayList>
</spectrum>
Expand All @@ -186,4 +186,4 @@
</indexList>
<indexListOffset>12669</indexListOffset>
<fileChecksum>0</fileChecksum>
</indexedmzML>
</indexedmzML>
189 changes: 189 additions & 0 deletions src/tests/topp/Resampler_2_output.mzML
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<indexedmzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0_idx.xsd">
<mzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd" accession="" version="1.1.0">
<cvList count="5">
<cv id="MS" fullName="Proteomics Standards Initiative Mass Spectrometry Ontology" URI="http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo"/>
<cv id="UO" fullName="Unit Ontology" URI="http://obo.cvs.sourceforge.net/obo/obo/ontology/phenotype/unit.obo"/>
<cv id="BTO" fullName="BrendaTissue545" version="unknown" URI="http://www.brenda-enzymes.info/ontology/tissue/tree/update/update_files/BrendaTissueOBO"/>
<cv id="GO" fullName="Gene Ontology - Slim Versions" version="unknown" URI="http://www.geneontology.org/GO_slims/goslim_goa.obo"/>
<cv id="PATO" fullName="Quality ontology" version="unknown" URI="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/quality.obo"/>
</cvList>
<fileDescription>
<fileContent>
<cvParam cvRef="MS" accession="MS:1000294" name="mass spectrum" />
</fileContent>
<contact>
<cvParam cvRef="MS" accession="MS:1000586" name="contact name" value=", " />
<cvParam cvRef="MS" accession="MS:1000590" name="contact affiliation" value="" />
</contact>
</fileDescription>
<sampleList count="1">
<sample id="sa_0" name="">
<cvParam cvRef="MS" accession="MS:1000004" name="sample mass" value="0" unitAccession="UO:0000021" unitName="gram" unitCvRef="UO" />
<cvParam cvRef="MS" accession="MS:1000005" name="sample volume" value="0" unitAccession="UO:0000098" unitName="milliliter" unitCvRef="UO" />
<cvParam cvRef="MS" accession="MS:1000006" name="sample concentration" value="0" unitAccession="UO:0000175" unitName="gram per liter" unitCvRef="UO" />
</sample>
</sampleList>
<softwareList count="5">
<software id="so_in_0" version="" >
<cvParam cvRef="MS" accession="MS:1000799" name="custom unreleased software tool" value="" />
</software>
<software id="so_default" version="" >
<cvParam cvRef="MS" accession="MS:1000799" name="custom unreleased software tool" value="" />
</software>
<software id="so_dp_sp_0_pm_0" version="" >
<cvParam cvRef="MS" accession="MS:1000799" name="custom unreleased software tool" value="" />
</software>
<software id="so_dp_sp_0_pm_1" version="version_string" >
<cvParam cvRef="MS" accession="MS:1000756" name="FileConverter" />
</software>
<software id="so_dp_sp_0_pm_2" version="version_string" >
<cvParam cvRef="MS" accession="MS:1000764" name="Resampler" />
</software>
</softwareList>
<instrumentConfigurationList count="1">
<instrumentConfiguration id="ic_0">
<cvParam cvRef="MS" accession="MS:1000031" name="instrument model" />
<componentList count="3">
<source order="0">
<cvParam cvRef="MS" accession="MS:1000008" name="ionization type" />
</source>
<analyzer order="0">
<cvParam cvRef="MS" accession="MS:1000014" name="accuracy" value="0" unitAccession="UO:0000169" unitName="parts per million" unitCvRef="UO" />
<cvParam cvRef="MS" accession="MS:1000022" name="TOF Total Path Length" value="0" unitAccession="UO:0000008" unitName="meter" unitCvRef="UO" />
<cvParam cvRef="MS" accession="MS:1000024" name="final MS exponent" value="0" />
<cvParam cvRef="MS" accession="MS:1000025" name="magnetic field strength" value="0" unitAccession="UO:0000228" unitName="tesla" unitCvRef="UO" />
<cvParam cvRef="MS" accession="MS:1000443" name="mass analyzer type" />
</analyzer>
<detector order="0">
<cvParam cvRef="MS" accession="MS:1000028" name="detector resolution" value="0" />
<cvParam cvRef="MS" accession="MS:1000029" name="sampling frequency" value="0" unitAccession="UO:0000106" unitName="hertz" unitCvRef="UO" />
<cvParam cvRef="MS" accession="MS:1000026" name="detector type" />
</detector>
</componentList>
<softwareRef ref="so_in_0" />
</instrumentConfiguration>
</instrumentConfigurationList>
<dataProcessingList count="1">
<dataProcessing id="dp_sp_0">
<processingMethod order="0" softwareRef="so_dp_sp_0_pm_0">
<cvParam cvRef="MS" accession="MS:1000543" name="data processing action" />
</processingMethod>
<processingMethod order="0" softwareRef="so_dp_sp_0_pm_1">
<cvParam cvRef="MS" accession="MS:1000544" name="Conversion to mzML" />
<cvParam cvRef="MS" accession="MS:1000747" name="completion time" value="1999-12-31+23:59" />
<userParam name="parameter: mode" type="xsd:string" value="test_mode"/>
</processingMethod>
<processingMethod order="0" softwareRef="so_dp_sp_0_pm_2">
<cvParam cvRef="MS" accession="MS:1000543" name="data processing action" />
<cvParam cvRef="MS" accession="MS:1000747" name="completion time" value="1999-12-31+23:59" />
<userParam name="parameter: mode" type="xsd:string" value="test_mode"/>
</processingMethod>
</dataProcessing>
</dataProcessingList>
<run id="ru_0" defaultInstrumentConfigurationRef="ic_0" sampleRef="sa_0">
<spectrumList count="3" defaultDataProcessingRef="dp_sp_0">
<spectrum id="spectrum=1" index="0" defaultArrayLength="7" dataProcessingRef="dp_sp_0">
<cvParam cvRef="MS" accession="MS:1000525" name="spectrum representation" />
<cvParam cvRef="MS" accession="MS:1000511" name="ms level" value="1" />
<cvParam cvRef="MS" accession="MS:1000294" name="mass spectrum" />
<scanList count="1">
<cvParam cvRef="MS" accession="MS:1000795" name="no combination" />
<scan >
<cvParam cvRef="MS" accession="MS:1000016" name="scan start time" value="474.558" unitAccession="UO:0000010" unitName="second" unitCvRef="UO" />
</scan>
</scanList>
<binaryDataArrayList count="2">
<binaryDataArray encodedLength="76">
<cvParam cvRef="MS" accession="MS:1000514" name="m/z array" unitAccession="MS:1000040" unitName="m/z" unitCvRef="MS" />
<cvParam cvRef="MS" accession="MS:1000523" name="64-bit float" />
<cvParam cvRef="MS" accession="MS:1000576" name="no compression" />
<binary>AAAAAAAgjUD0/dR46S6NQIusNZTaPY1A+sUKVtNMjUDWBD/C01uNQBolv9zbao1ALeV5qet5jUA=</binary>
</binaryDataArray>
<binaryDataArray encodedLength="40">
<cvParam cvRef="MS" accession="MS:1000515" name="intensity array" unitAccession="MS:1000131" unitName="number of detector counts" unitCvRef="MS"/>
<cvParam cvRef="MS" accession="MS:1000521" name="32-bit float" />
<cvParam cvRef="MS" accession="MS:1000576" name="no compression" />
<binary>WOdKRFBrZUSDeA9GT5xMRy+it0Z593VFbFtTRQ==</binary>
</binaryDataArray>
</binaryDataArrayList>
</spectrum>
<spectrum id="spectrum=2" index="1" defaultArrayLength="7">
<cvParam cvRef="MS" accession="MS:1000525" name="spectrum representation" />
<cvParam cvRef="MS" accession="MS:1000511" name="ms level" value="1" />
<cvParam cvRef="MS" accession="MS:1000294" name="mass spectrum" />
<scanList count="1">
<cvParam cvRef="MS" accession="MS:1000795" name="no combination" />
<scan >
<cvParam cvRef="MS" accession="MS:1000016" name="scan start time" value="475.321" unitAccession="UO:0000010" unitName="second" unitCvRef="UO" />
</scan>
</scanList>
<binaryDataArrayList count="2">
<binaryDataArray encodedLength="76">
<cvParam cvRef="MS" accession="MS:1000514" name="m/z array" unitAccession="MS:1000040" unitName="m/z" unitCvRef="MS" />
<cvParam cvRef="MS" accession="MS:1000523" name="64-bit float" />
<cvParam cvRef="MS" accession="MS:1000576" name="no compression" />
<binary>AAAAAAAgjUD0/dR46S6NQIusNZTaPY1A+sUKVtNMjUDWBD/C01uNQBolv9zbao1ALeV5qet5jUA=</binary>
</binaryDataArray>
<binaryDataArray encodedLength="40">
<cvParam cvRef="MS" accession="MS:1000515" name="intensity array" unitAccession="MS:1000131" unitName="number of detector counts" unitCvRef="MS"/>
<cvParam cvRef="MS" accession="MS:1000521" name="32-bit float" />
<cvParam cvRef="MS" accession="MS:1000576" name="no compression" />
<binary>WOdKRFBrZUSDeA9GT5xMRy+it0Z593VFbFtTRQ==</binary>
</binaryDataArray>
</binaryDataArrayList>
</spectrum>
<spectrum id="spectrum=3" index="2" defaultArrayLength="7">
<cvParam cvRef="MS" accession="MS:1000525" name="spectrum representation" />
<cvParam cvRef="MS" accession="MS:1000511" name="ms level" value="2" />
<cvParam cvRef="MS" accession="MS:1000294" name="mass spectrum" />
<scanList count="1">
<cvParam cvRef="MS" accession="MS:1000795" name="no combination" />
<scan >
<cvParam cvRef="MS" accession="MS:1000016" name="scan start time" value="475.321" unitAccession="UO:0000010" unitName="second" unitCvRef="UO" />
</scan>
</scanList>
<precursorList count="1">
<precursor>
<isolationWindow>
<cvParam cvRef="MS" accession="MS:1000827" name="isolation window target m/z" value="940.368" unitAccession="MS:1000040" unitName="m/z" unitCvRef="MS" />
</isolationWindow>
<selectedIonList count="1">
<selectedIon>
<cvParam cvRef="MS" accession="MS:1000744" name="selected ion m/z" value="940.368" unitAccession="MS:1000040" unitName="m/z" unitCvRef="MS" />
<cvParam cvRef="MS" accession="MS:1000042" name="peak intensity" value="7817" unitAccession="MS:1000132" unitName="percent of base peak" unitCvRef="MS" />
</selectedIon>
</selectedIonList>
<activation>
<cvParam cvRef="MS" accession="MS:1000044" name="dissociation method" />
</activation>
</precursor>
</precursorList>
<binaryDataArrayList count="2">
<binaryDataArray encodedLength="76">
<cvParam cvRef="MS" accession="MS:1000514" name="m/z array" unitAccession="MS:1000040" unitName="m/z" unitCvRef="MS" />
<cvParam cvRef="MS" accession="MS:1000523" name="64-bit float" />
<cvParam cvRef="MS" accession="MS:1000576" name="no compression" />
<binary>AAAAAAAgjUD0/dR46S6NQIusNZTaPY1A+sUKVtNMjUDWBD/C01uNQBolv9zbao1ALeV5qet5jUA=</binary>
</binaryDataArray>
<binaryDataArray encodedLength="40">
<cvParam cvRef="MS" accession="MS:1000515" name="intensity array" unitAccession="MS:1000131" unitName="number of detector counts" unitCvRef="MS"/>
<cvParam cvRef="MS" accession="MS:1000521" name="32-bit float" />
<cvParam cvRef="MS" accession="MS:1000576" name="no compression" />
<binary>WOdKRFBrZUSDeA9GT5xMRy+it0Z593VFbFtTRQ==</binary>
</binaryDataArray>
</binaryDataArrayList>
</spectrum>
</spectrumList>
</run>
</mzML>
<indexList count="1">
<index name="spectrum">
<offset idRef="spectrum=1">5378</offset>
<offset idRef="spectrum=2">6933</offset>
<offset idRef="spectrum=3">8460</offset>
</index>
</indexList>
<indexListOffset>10848</indexListOffset>
<fileChecksum>0</fileChecksum>
</indexedmzML>
78 changes: 20 additions & 58 deletions src/topp/Resampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
#include <OpenMS/FORMAT/FileHandler.h>
#include <OpenMS/APPLICATIONS/TOPPBase.h>
#include <OpenMS/VISUAL/MultiGradient.h>
#include <OpenMS/FILTERING/TRANSFORMERS/LinearResampler.h>
#include <OpenMS/FILTERING/TRANSFORMERS/LinearResamplerAlign.h>
#include <OpenMS/FILTERING/TRANSFORMERS/ThresholdMower.h>

#include <QtGui/QImage>

Expand Down Expand Up @@ -69,10 +69,10 @@ class TOPPResampler :
void registerOptionsAndFlags_() override
{
registerInputFile_("in", "<file>", "", "input file ");
setValidFormats_("in", ListUtils::create<String>("mzML"));
registerOutputFile_("out", "<file>", "",
"output file in mzML format");
setValidFormats_("out", ListUtils::create<String>("mzML"));
setValidFormats_("in", {"mzML"});

registerOutputFile_("out", "<file>", "", "output file in mzML format");
setValidFormats_("out", {"mzML"});

registerDoubleOption_("sampling_rate", "<rate>", 0.1,
"New sampling rate in m/z dimension (in Th unless ppm flag is set)", false);
Expand All @@ -98,83 +98,45 @@ class TOPPResampler :
bool align_sampling = getFlag_("align_sampling");
bool ppm = getFlag_("ppm");
PeakMap exp;
exp.updateRanges();

FileHandler().loadExperiment(in, exp, {FileTypes::MZML}, log_type_);

Param resampler_param;
resampler_param.setValue("spacing", sampling_rate);
if (ppm) resampler_param.setValue("ppm", "true");
else resampler_param.setValue("ppm", "false");
resampler_param.setValue("ppm", ppm ? "true" : "false");

LinearResamplerAlign lin_resampler; // LinearResampler does not know about ppm!
lin_resampler.setParameters(resampler_param);
if (!align_sampling)
{
LinearResampler lin_resampler;
lin_resampler.setParameters(resampler_param);

// resample every scan
for (Size i = 0; i < exp.size(); ++i)
{
lin_resampler.raster(exp[i]);
}
}
else
else if(!exp.RangeRT::isEmpty())
{
LinearResamplerAlign lin_resampler;
lin_resampler.setParameters(resampler_param);

bool start_pos_set = false;
bool end_pos_set = false;
double start_pos = 0.0;
double end_pos = 0.0;
// get max / min positions across whole map
for (Size i = 0; i < exp.size(); ++i)
{
if (!exp[i].empty() && (!start_pos_set || exp[i][0].getMZ() < start_pos) )
{
start_pos = exp[i][0].getMZ();
start_pos_set = true;
}
if (!exp[i].empty() && (!end_pos_set || exp[i].back().getMZ() > end_pos) )
{
end_pos = exp[i].back().getMZ();
end_pos_set = true;
}
}
// start with even position
auto start_pos = floor(exp.getMinRT());

if (start_pos_set)
// resample every scan
for (Size i = 0; i < exp.size(); ++i)
{
// start with even position
start_pos = std::floor(start_pos);

// resample every scan
for (Size i = 0; i < exp.size(); ++i)
{
lin_resampler.raster_align(exp[i], start_pos, end_pos);
}
lin_resampler.raster_align(exp[i], start_pos, exp.getMaxRT());
}
}

if (min_int_cutoff >= 0.0)
{
for (Size i = 0; i < exp.size(); ++i)
{
MSSpectrum tmp = exp[i];
tmp.clear(false);
for (Size j = 0; j < exp[i].size(); j++)
{
if (exp[i][j].getIntensity() > min_int_cutoff)
{
tmp.push_back(exp[i][j]);
}
}
// swap
exp[i] = tmp;
}
ThresholdMower mow;
Param p;
p.setValue("threshold", min_int_cutoff);
mow.setParameters(p);
mow.filterPeakMap(exp);
}

//clear meta data because they are no longer meaningful
exp.clearMetaDataArrays();

//annotate output with data processing info
addDataProcessing_(exp,
getProcessingInfo_(DataProcessing::DATA_PROCESSING));
Expand Down

0 comments on commit 3640aef

Please sign in to comment.